From 6bcd62005dd7eab94744f5f368d4724732bcfbd9 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 14 Nov 2018 18:36:33 -0800 Subject: bunch of notes on CSL alignment and types --- notes/schema/alignments.csv | 79 ++++++++++++++++++++------- notes/schema/alignments.txt | 21 ++++++++ notes/schema/contrib_types.txt | 36 +++++++++---- notes/schema/work_release_types.txt | 103 +++++++++++++++++++++++++++++++----- 4 files changed, 196 insertions(+), 43 deletions(-) diff --git a/notes/schema/alignments.csv b/notes/schema/alignments.csv index b8619ddc..294cf969 100644 --- a/notes/schema/alignments.csv +++ b/notes/schema/alignments.csv @@ -1,35 +1,38 @@ fatcat entity,fatcat field,description,Crossref,ORCID,Bibtex,CSL,BIBFRAME,Dublin Core (DCMES),Dublin Core (DCMI),oai_dc (OAI-PMH),resourceSync,Highwire Press (google scholar),MEDLINE (?) WORK,,,,,,,Work,,,,,, -,work_type,,,,,,,Type,,,,, RELEASE,,,,,,,Instance,,,,,, -,title,,title,,title,,,Title,,title,,citation_title, -,release_type,,type (???),,object type; howpublished,,,Type,,,,, -,release_status,"NEW; eg, preprint, final",,,,,,,,,,, +,title,,title,,title,title,,Title,,title,,citation_title, +,release_type,use CSL types; verbatim?,type (???),,object type; howpublished,“type” vocabulary,,Type,,,,, +,release_status,"eg, preprint, final",,,,,,,,,,, ,container,,ISSN (via lookup),,ISSN (via lookup),,,Relation (?),,,,citation_issn (normalized), -,release_date,RENAME,published-print or published-online,,"year, month",issued,,Date,Issued (?),,,citation_publication_date, -,volume,,volume,,volume,,,,,,,citation_volume, -,pages,,page,,pages,,,,,,,"citation_firstpage, citation_lastpage", -,issue,,issue,,,,,,,,,citation_issue, +,release_date,,published-print or published-online,,"year, month",issued or original-date,,Date,Issued (?),,,citation_publication_date, +,volume,,volume,,volume,volume (number),,,,,,citation_volume, +,pages,,page,,pages,page,,,,,,"citation_firstpage, citation_lastpage", +,issue,,issue,,,issue (number),,,,,,citation_issue, ,doi,always lower-case,DOI,,doi,DOI,,Identifier,,,,citation_doi, ,isbn13,,ISBN (converted),,,ISBN,,Identifier,,,,citation_isbn, -,publisher,,publisher,,publisher,,,Publisher,,,,"citation_dissertation_institution, citation_technical_report_institution", -,language,NEW; RFC1766 (ISO 639-1 superset); may transition,,,,,,Language,,,,, +,core_id,,,,,,,,,,,, +,pmid,,,,,PMID,,,,,,, +,pmcid,,,,,PMCID,,,,,,, +,wikidata_qid,,,,,,,,,,,, +,arxiv_id,TODO?,,,,,,,,,,, +,publisher,,publisher,,publisher,publisher,,Publisher,,,,"citation_dissertation_institution, citation_technical_report_institution", +,language,RFC1766 (ISO 639-1 superset); may transition,,,,,,Language,,,,, ,ref:index,,reference:[index],,,Citation-number,,,,,,, -,ref:key,NEW,reference:key,,,citation-label,,,,,,, -,ref:raw,RENAME,reference:unstructured,,,,,,,,,, -,ref:container_title,NEW,reference:journal-title,,,,,,,,,, -,ref:year,NEW,reference:year,,,,,,,,,, -,ref:title,NEW,,,,,,,,,,, -,ref:locator,"NEW; (aka, page number)",,,,,,,,,,, +,ref:key,,reference:key,,,citation-label,,,,,,, +,ref:raw,,reference:unstructured,,,,,,,,,, +,ref:container_title,,reference:journal-title,,,,,,,,,, +,ref:year,,reference:year,,,,,,,,,, +,ref:title,,,,,,,,,,,, +,ref:locator,"aka, page number",,,,,,,,,,, ,contrib:role,,,,,,Role,,,,,, ,contrib:index,,author[index],,,,,,,,,, -,contrib:raw,RENAME,Author:{given+family},,author,,,,,,,, +,contrib:raw,,Author:{given+family},,author,,,,,,,, ,extra:number,tech reports,,,,,,,,,,citation_technical_report_number, ,extra:institution,for tech reports and dissertations,,,,,,,,,,, -,extra:abstract,,,,,,,Description,,,,citation_abstract, -,extra:pmcid,,,,,,,,,,,, -,extra:pmid,,,,,,,,,,,, ,extra:version,"eg, for software",,,,,,,,,,, +,extra:archives,???,,,,archive,,,,,,, +,extra:genre,Sub-category of type; eg masters vs. phd thesis,,,,genre,,,,,,, ,,,,,,,,,,,,, CREATOR,,,,,,,Agent,Creator / Contributor,,,,, ,display_name,NEW; usually western/latinized ,,.name.credit-name.value (?),,,,,,,,citation_author, @@ -55,3 +58,39 @@ FILE,,,,,,,Item,,,,,, ,md5,,,,,,,,,,hash,, ,url,,,,www_pdf_url,,,,,,href,citation_pdf_url, ,mimetype,NEW,,,,,,Format,,,type,, +RELEASE_TYPE,,,,,,,,,,,,, +,article,if type isn’t known. Also for pre-prints?,,,,,,,,,,, +,article-magazine,* can be digital,,,,,,,,,,, +,article-newspaper,* can be digital,,,,,,,,,,, +,article-journal,*,,,,,,,,,,, +,bill,,,,,,,,,,,, +,book,*,,,,,,,,,,, +,broadcast,,,,,,,,,,,, +,chapter,*,,,,,,,,,,, +,dataset,*,,,,,,,,,,, +,entry,,,,,,,,,,,, +,entry-dictionary,,,,,,,,,,,, +,entry-encyclopedia,,,,,,,,,,,, +,figure,,,,,,,,,,,, +,graphic,,,,,,,,,,,, +,interview,,,,,,,,,,,, +,legislation,,,,,,,,,,,, +,legal_case,,,,,,,,,,,, +,manuscript,*,,,,,,,,,,, +,map,,,,,,,,,,,, +,motion_picture,,,,,,,,,,,, +,musical_score,,,,,,,,,,,, +,pamphlet,,,,,,,,,,,, +,paper-conference,*,,,,,,,,,,, +,patent,,,,,,,,,,,, +,post,"web/social media (eg, twitter)",,,,,,,,,,, +,post-weblog,*,,,,,,,,,,, +,personal_communication,,,,,,,,,,,, +,report,*,,,,,,,,,,, +,review,*,,,,,,,,,,, +,review-book,*,,,,,,,,,,, +,song,,,,,,,,,,,, +,speech,*,,,,,,,,,,, +,thesis,*,,,,,,,,,,, +,treaty,,,,,,,,,,,, +,webpage,*,,,,,,,,,,, diff --git a/notes/schema/alignments.txt b/notes/schema/alignments.txt index e2736268..e7678d93 100644 --- a/notes/schema/alignments.txt +++ b/notes/schema/alignments.txt @@ -20,3 +20,24 @@ Quick descriptions of the "original 15" fields: Specifically, the "variables" and type definitions: + +"extra" fields: +- medium (CD, DVD; from CSL) +- genre (Phd vs. masters thesis? from CSL) +- rights/license (for explicit OA) +- version (eg, for software, standards) +- url (eg, for blog posts and other web content; canonical only) + +other things: +- align cite-items even closer with CSL? assuming this is what crossref is doing +- anything specially needed for a blog post? url (original/canonical)? +- press_release + +more serious schema issues: +- add arxiv id (for easier aggressive import) +- two levels of container? something for both "series" and "specific year of + conference". nested seems to get out of hand. for now, just a + "series-container" string in extra? +- for chapter/book relations, a `part_of` field for release_rev to release_ident +- ok, now I understand the whole "date-parts" business. hrm, maybe need to have + 3 columns or use a string format diff --git a/notes/schema/contrib_types.txt b/notes/schema/contrib_types.txt index 01024b40..6070240b 100644 --- a/notes/schema/contrib_types.txt +++ b/notes/schema/contrib_types.txt @@ -1,14 +1,30 @@ -See also: +fatcat should probably allow all roles, but only emphasize: +- author +- editor +- translator +- illustrator -author -editor -translator -illustrator -interviewer -reviewer +possible additional (non-CSL) roles: +- chair +Crossref has the additional "chair" contributor_role: '"chair" should only be +used for conference proceedings to indicate a conference chair'. -container-author -container-editor -reviewed-author +All CSL Roles: +- author +- collection-editor +- composer +- container-author +- director +- editor +- editorial-director +- editortranslator +- illustrator +- interviewer +- original-author +- recipient +- reviewed-author +- translator + +From: diff --git a/notes/schema/work_release_types.txt b/notes/schema/work_release_types.txt index 6eff118b..150a7ae3 100644 --- a/notes/schema/work_release_types.txt +++ b/notes/schema/work_release_types.txt @@ -1,28 +1,105 @@ -see also: +see also: +- +- + +"publication status" instead of pre-print and manuscript types + +all CSL types (note: can also be null): +- article +- article-magazine +- article-newspaper +- article-journal +- bill +- book +- broadcast +- chapter +- dataset +- entry +- entry-dictionary +- entry-encyclopedia +- figure +- graphic +- interview +- legislation +- legal_case +- manuscript +- map +- motion_picture +- musical_score +- pamphlet +- paper-conference +- patent +- post +- post-weblog +- personal_communication +- report +- review +- review-book +- song +- speech +- thesis +- treaty +- webpage basics: - article-journal +- article-magazine +- article-newspaper - book - chapter - dataset - manuscript - paper-conference -- blog-post +- post-weblog - report +- review +- speech - thesis (aka, dissertation) +- webpage + +additional fatcat types: +- software (https://github.com/citation-style-language/zotero-bits/issues/69) +- peer_review +- standard (as in, RFC or ISO) + +"paper-like" (for various stats): +- article-journal +- chapter +- paper-conference +- thesis -- preprint -- conference proceeding -- book chapter -- technical report/memo -- dissertation/thesis -- blog post -- wiki page -- presentation (slides, recording) +crossref work types (https://api.crossref.org/types): +- book => book +- book-chapter => chapter +- book-part => chapter +- book-section => chapter +- component => (or more specific) +- dataset => dataset +- dissertation => thesis +- edited-book => book +- journal-article => article-journal +- monograph => monograph +- other => +- peer-review => peer_review (or blank) +- posted-content => post (or more specific) +- proceedings-article => paper-conference +- reference-book => book +- reference-entry => entry (or more specific) +- report => report +- standard => ??? -"publication status" instead of pre-print and manuscript types? -abstracts as files? +crossref non-work types (don't need mappings; don't import? as containers? + two-layers of containers?) +- book-series +- book-set +- book-track +- journal (definitely not release) +- journal-issue +- journal-volume +- proceedings (definitely not release) +- proceedings-series +- report-series +- standard-series -videos, slides -- cgit v1.2.3