aboutsummaryrefslogtreecommitdiffstats
path: root/notes
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-11-14 18:36:33 -0800
committerBryan Newbold <bnewbold@robocracy.org>2018-11-14 18:36:33 -0800
commit6bcd62005dd7eab94744f5f368d4724732bcfbd9 (patch)
tree5593a613460019e1fe54ab445eb2104fd9381cbb /notes
parent7634f6ecf2361b1cb1cafd4e27fd1fb84d81d130 (diff)
downloadfatcat-6bcd62005dd7eab94744f5f368d4724732bcfbd9.tar.gz
fatcat-6bcd62005dd7eab94744f5f368d4724732bcfbd9.zip
bunch of notes on CSL alignment and types
Diffstat (limited to 'notes')
-rw-r--r--notes/schema/alignments.csv79
-rw-r--r--notes/schema/alignments.txt21
-rw-r--r--notes/schema/contrib_types.txt36
-rw-r--r--notes/schema/work_release_types.txt103
4 files changed, 196 insertions, 43 deletions
diff --git a/notes/schema/alignments.csv b/notes/schema/alignments.csv
index b8619ddc..294cf969 100644
--- a/notes/schema/alignments.csv
+++ b/notes/schema/alignments.csv
@@ -1,35 +1,38 @@
fatcat entity,fatcat field,description,Crossref,ORCID,Bibtex,CSL,BIBFRAME,Dublin Core (DCMES),Dublin Core (DCMI),oai_dc (OAI-PMH),resourceSync,Highwire Press (google scholar),MEDLINE (?)
WORK,,,,,,,Work,,,,,,
-,work_type,,,,,,,Type,,,,,
RELEASE,,,,,,,Instance,,,,,,
-,title,,title,,title,,,Title,,title,,citation_title,
-,release_type,,type (???),,object type; howpublished,,,Type,,,,,
-,release_status,"NEW; eg, preprint, final",,,,,,,,,,,
+,title,,title,,title,title,,Title,,title,,citation_title,
+,release_type,use CSL types; verbatim?,type (???),,object type; howpublished,“type” vocabulary,,Type,,,,,
+,release_status,"eg, preprint, final",,,,,,,,,,,
,container,,ISSN (via lookup),,ISSN (via lookup),,,Relation (?),,,,citation_issn (normalized),
-,release_date,RENAME,published-print or published-online,,"year, month",issued,,Date,Issued (?),,,citation_publication_date,
-,volume,,volume,,volume,,,,,,,citation_volume,
-,pages,,page,,pages,,,,,,,"citation_firstpage, citation_lastpage",
-,issue,,issue,,,,,,,,,citation_issue,
+,release_date,,published-print or published-online,,"year, month",issued or original-date,,Date,Issued (?),,,citation_publication_date,
+,volume,,volume,,volume,volume (number),,,,,,citation_volume,
+,pages,,page,,pages,page,,,,,,"citation_firstpage, citation_lastpage",
+,issue,,issue,,,issue (number),,,,,,citation_issue,
,doi,always lower-case,DOI,,doi,DOI,,Identifier,,,,citation_doi,
,isbn13,,ISBN (converted),,,ISBN,,Identifier,,,,citation_isbn,
-,publisher,,publisher,,publisher,,,Publisher,,,,"citation_dissertation_institution, citation_technical_report_institution",
-,language,NEW; RFC1766 (ISO 639-1 superset); may transition,,,,,,Language,,,,,
+,core_id,,,,,,,,,,,,
+,pmid,,,,,PMID,,,,,,,
+,pmcid,,,,,PMCID,,,,,,,
+,wikidata_qid,,,,,,,,,,,,
+,arxiv_id,TODO?,,,,,,,,,,,
+,publisher,,publisher,,publisher,publisher,,Publisher,,,,"citation_dissertation_institution, citation_technical_report_institution",
+,language,RFC1766 (ISO 639-1 superset); may transition,,,,,,Language,,,,,
,ref:index,,reference:[index],,,Citation-number,,,,,,,
-,ref:key,NEW,reference:key,,,citation-label,,,,,,,
-,ref:raw,RENAME,reference:unstructured,,,,,,,,,,
-,ref:container_title,NEW,reference:journal-title,,,,,,,,,,
-,ref:year,NEW,reference:year,,,,,,,,,,
-,ref:title,NEW,,,,,,,,,,,
-,ref:locator,"NEW; (aka, page number)",,,,,,,,,,,
+,ref:key,,reference:key,,,citation-label,,,,,,,
+,ref:raw,,reference:unstructured,,,,,,,,,,
+,ref:container_title,,reference:journal-title,,,,,,,,,,
+,ref:year,,reference:year,,,,,,,,,,
+,ref:title,,,,,,,,,,,,
+,ref:locator,"aka, page number",,,,,,,,,,,
,contrib:role,,,,,,Role,,,,,,
,contrib:index,,author[index],,,,,,,,,,
-,contrib:raw,RENAME,Author:{given+family},,author,,,,,,,,
+,contrib:raw,,Author:{given+family},,author,,,,,,,,
,extra:number,tech reports,,,,,,,,,,citation_technical_report_number,
,extra:institution,for tech reports and dissertations,,,,,,,,,,,
-,extra:abstract,,,,,,,Description,,,,citation_abstract,
-,extra:pmcid,,,,,,,,,,,,
-,extra:pmid,,,,,,,,,,,,
,extra:version,"eg, for software",,,,,,,,,,,
+,extra:archives,???,,,,archive,,,,,,,
+,extra:genre,Sub-category of type; eg masters vs. phd thesis,,,,genre,,,,,,,
,,,,,,,,,,,,,
CREATOR,,,,,,,Agent,Creator / Contributor,,,,,
,display_name,NEW; usually western/latinized ,,.name.credit-name.value (?),,,,,,,,citation_author,
@@ -55,3 +58,39 @@ FILE,,,,,,,Item,,,,,,
,md5,,,,,,,,,,hash,,
,url,,,,www_pdf_url,,,,,,href,citation_pdf_url,
,mimetype,NEW,,,,,,Format,,,type,,
+RELEASE_TYPE,,,,,,,,,,,,,
+,article,if type isn’t known. Also for pre-prints?,,,,,,,,,,,
+,article-magazine,* can be digital,,,,,,,,,,,
+,article-newspaper,* can be digital,,,,,,,,,,,
+,article-journal,*,,,,,,,,,,,
+,bill,,,,,,,,,,,,
+,book,*,,,,,,,,,,,
+,broadcast,,,,,,,,,,,,
+,chapter,*,,,,,,,,,,,
+,dataset,*,,,,,,,,,,,
+,entry,,,,,,,,,,,,
+,entry-dictionary,,,,,,,,,,,,
+,entry-encyclopedia,,,,,,,,,,,,
+,figure,,,,,,,,,,,,
+,graphic,,,,,,,,,,,,
+,interview,,,,,,,,,,,,
+,legislation,,,,,,,,,,,,
+,legal_case,,,,,,,,,,,,
+,manuscript,*,,,,,,,,,,,
+,map,,,,,,,,,,,,
+,motion_picture,,,,,,,,,,,,
+,musical_score,,,,,,,,,,,,
+,pamphlet,,,,,,,,,,,,
+,paper-conference,*,,,,,,,,,,,
+,patent,,,,,,,,,,,,
+,post,"web/social media (eg, twitter)",,,,,,,,,,,
+,post-weblog,*,,,,,,,,,,,
+,personal_communication,,,,,,,,,,,,
+,report,*,,,,,,,,,,,
+,review,*,,,,,,,,,,,
+,review-book,*,,,,,,,,,,,
+,song,,,,,,,,,,,,
+,speech,*,,,,,,,,,,,
+,thesis,*,,,,,,,,,,,
+,treaty,,,,,,,,,,,,
+,webpage,*,,,,,,,,,,,
diff --git a/notes/schema/alignments.txt b/notes/schema/alignments.txt
index e2736268..e7678d93 100644
--- a/notes/schema/alignments.txt
+++ b/notes/schema/alignments.txt
@@ -20,3 +20,24 @@ Quick descriptions of the "original 15" fields: <http://mn.gov/bridges/dcore.htm
Human-readable specification: <http://docs.citationstyles.org/en/1.0.1/specification.html>
Specifically, the "variables" and type definitions: <http://docs.citationstyles.org/en/stable/specification.html#appendix-iv-variables>
+
+"extra" fields:
+- medium (CD, DVD; from CSL)
+- genre (Phd vs. masters thesis? from CSL)
+- rights/license (for explicit OA)
+- version (eg, for software, standards)
+- url (eg, for blog posts and other web content; canonical only)
+
+other things:
+- align cite-items even closer with CSL? assuming this is what crossref is doing
+- anything specially needed for a blog post? url (original/canonical)?
+- press_release
+
+more serious schema issues:
+- add arxiv id (for easier aggressive import)
+- two levels of container? something for both "series" and "specific year of
+ conference". nested seems to get out of hand. for now, just a
+ "series-container" string in extra?
+- for chapter/book relations, a `part_of` field for release_rev to release_ident
+- ok, now I understand the whole "date-parts" business. hrm, maybe need to have
+ 3 columns or use a string format
diff --git a/notes/schema/contrib_types.txt b/notes/schema/contrib_types.txt
index 01024b40..6070240b 100644
--- a/notes/schema/contrib_types.txt
+++ b/notes/schema/contrib_types.txt
@@ -1,14 +1,30 @@
-See also: <http://docs.citationstyles.org/en/stable/specification.html#roles>
+fatcat should probably allow all roles, but only emphasize:
+- author
+- editor
+- translator
+- illustrator
-author
-editor
-translator
-illustrator
-interviewer
-reviewer
+possible additional (non-CSL) roles:
+- chair
+Crossref has the additional "chair" contributor_role: '"chair" should only be
+used for conference proceedings to indicate a conference chair'.
-container-author
-container-editor
-reviewed-author
+All CSL Roles:
+- author
+- collection-editor
+- composer
+- container-author
+- director
+- editor
+- editorial-director
+- editortranslator
+- illustrator
+- interviewer
+- original-author
+- recipient
+- reviewed-author
+- translator
+
+From: <http://docs.citationstyles.org/en/stable/specification.html#roles>
diff --git a/notes/schema/work_release_types.txt b/notes/schema/work_release_types.txt
index 6eff118b..150a7ae3 100644
--- a/notes/schema/work_release_types.txt
+++ b/notes/schema/work_release_types.txt
@@ -1,28 +1,105 @@
-see also: <http://docs.citationstyles.org/en/stable/specification.html#appendix-iii-types>
+see also:
+- <http://docs.citationstyles.org/en/stable/specification.html#appendix-iii-types>
+- <https://citeproc-js.readthedocs.io/en/latest/csl-m/index.html#unpublished-extension>
+
+"publication status" instead of pre-print and manuscript types
+
+all CSL types (note: can also be null):
+- article
+- article-magazine
+- article-newspaper
+- article-journal
+- bill
+- book
+- broadcast
+- chapter
+- dataset
+- entry
+- entry-dictionary
+- entry-encyclopedia
+- figure
+- graphic
+- interview
+- legislation
+- legal_case
+- manuscript
+- map
+- motion_picture
+- musical_score
+- pamphlet
+- paper-conference
+- patent
+- post
+- post-weblog
+- personal_communication
+- report
+- review
+- review-book
+- song
+- speech
+- thesis
+- treaty
+- webpage
basics:
- article-journal
+- article-magazine
+- article-newspaper
- book
- chapter
- dataset
- manuscript
- paper-conference
-- blog-post
+- post-weblog
- report
+- review
+- speech
- thesis (aka, dissertation)
+- webpage
+
+additional fatcat types:
+- software (https://github.com/citation-style-language/zotero-bits/issues/69)
+- peer_review
+- standard (as in, RFC or ISO)
+
+"paper-like" (for various stats):
+- article-journal
+- chapter
+- paper-conference
+- thesis
-- preprint
-- conference proceeding
-- book chapter
-- technical report/memo
-- dissertation/thesis
-- blog post
-- wiki page
-- presentation (slides, recording)
+crossref work types (https://api.crossref.org/types):
+- book => book
+- book-chapter => chapter
+- book-part => chapter
+- book-section => chapter
+- component => <blank> (or more specific)
+- dataset => dataset
+- dissertation => thesis
+- edited-book => book
+- journal-article => article-journal
+- monograph => monograph
+- other => <blank>
+- peer-review => peer_review (or blank)
+- posted-content => post (or more specific)
+- proceedings-article => paper-conference
+- reference-book => book
+- reference-entry => entry (or more specific)
+- report => report
+- standard => ???
-"publication status" instead of pre-print and manuscript types?
-abstracts as files?
+crossref non-work types (don't need mappings; don't import? as containers?
+ two-layers of containers?)
+- book-series
+- book-set
+- book-track
+- journal (definitely not release)
+- journal-issue
+- journal-volume
+- proceedings (definitely not release)
+- proceedings-series
+- report-series
+- standard-series
-videos, slides