summaryrefslogtreecommitdiffstats
path: root/prec.scm
diff options
context:
space:
mode:
Diffstat (limited to 'prec.scm')
-rw-r--r--prec.scm438
1 files changed, 438 insertions, 0 deletions
diff --git a/prec.scm b/prec.scm
new file mode 100644
index 0000000..bb66763
--- /dev/null
+++ b/prec.scm
@@ -0,0 +1,438 @@
+; "prec.scm", dynamically extensible parser/tokenizer -*-scheme-*-
+; Copyright 1989, 1990, 1991, 1992, 1993, 1995, 1997 Aubrey Jaffer.
+;
+;Permission to copy this software, to redistribute it, and to use it
+;for any purpose is granted, subject to the following restrictions and
+;understandings.
+;
+;1. Any copy made of this software must include this copyright notice
+;in full.
+;
+;2. I have made no warrantee or representation that the operation of
+;this software will be error-free, and I am under no obligation to
+;provide any services, by way of maintenance, update, or otherwise.
+;
+;3. In conjunction with products arising from the use of this
+;material, there shall be no use of my name in any advertising,
+;promotional, or sales literature without prior written consent in
+;each case.
+
+; This file implements:
+; * a Pratt style parser.
+; * a tokenizer which congeals tokens according to assigned classes of
+; constituent characters.
+;
+; This module is a significant improvement because grammar can be
+; changed dynamically from rulesets which don't need compilation.
+; Theoretically, all possibilities of bad input are handled and return
+; as much structure as was parsed when the error occured; The symbol
+; `?' is substituted for missing input.
+
+; References for the parser are:
+
+; Pratt, V. R.
+; Top Down Operator Precendence.
+; SIGACT/SIGPLAN
+; Symposium on Principles of Programming Languages,
+; Boston, 1973, 41-51
+
+; WORKING PAPER 121
+; CGOL - an Alternative External Representation For LISP users
+; Vaughan R. Pratt
+; MIT Artificial Intelligence Lab.
+; March 1976
+
+; Mathlab Group,
+; MACSYMA Reference Manual, Version Ten,
+; Laboratory for Computer Science, MIT, 1983
+
+(require 'fluid-let)
+(require 'string-search)
+(require 'string-port)
+(require 'delay)
+
+(define *syn-defs* #f)
+(define *syn-rules* #f) ;Dynamically bound
+(define *prec:port* #f) ;Dynamically bound
+
+;; keeps track of input column so we can generate useful error displays.
+(define tok:column 0)
+(define (tok:peek-char) (peek-char *prec:port*))
+(define (tok:read-char)
+ (let ((c (read-char *prec:port*)))
+ (if (or (eqv? c #\newline) (eof-object? c))
+ (set! tok:column 0)
+ (set! tok:column (+ 1 tok:column)))
+ c))
+(define (tok:bump-column pos . ports)
+ ((lambda (thunk)
+ (cond ((null? ports) (thunk))
+ (else (fluid-let ((*prec:port* (car ports))) (thunk)))))
+ (lambda ()
+ (cond ((eqv? #\newline (tok:peek-char))
+ (tok:read-char))) ;to do newline
+ (set! tok:column (+ tok:column pos)))))
+(define (prec:warn msg)
+ (do ((j (+ -1 tok:column) (+ -8 j)))
+ ((> 8 j)
+ (do ((i j (+ -1 i)))
+ ((>= 0 i))
+ (display #\ )))
+ (display slib:tab))
+ (display "^ ")
+ (display msg)
+ (newline))
+
+;; Structure of lexical records.
+(define tok:make-rec cons)
+(define tok:cc car)
+(define tok:sfp cdr)
+
+(define (tok:lookup alist char)
+ (if (eof-object? char)
+ #f
+ (let ((pair (assv char alist)))
+ (and pair (cdr pair)))))
+
+(define (tok:char-group group chars chars-proc)
+ (map (lambda (token)
+;;; (let ((oldlexrec (tok:lookup *syn-defs* token)))
+;;; (cond ((or (not oldlexrec) (eqv? (tok:cc oldlexrec) group)))
+;;; (else (math:warn 'cc-of token 'redefined-to- group))))
+ (cons token (tok:make-rec group chars-proc)))
+ (cond ((string? chars) (string->list chars))
+ ((char? chars) (list chars))
+ (else chars))))
+
+(define (tokenize)
+ (let* ((char (tok:read-char))
+ (rec (tok:lookup *syn-rules* char))
+ (proc (and rec (tok:cc rec)))
+ (clist (list char)))
+ (cond
+ ((not proc) char)
+ ((procedure? proc)
+ (do ((cl clist (begin (set-cdr! cl (list (tok:read-char))) (cdr cl))))
+ ((proc (tok:peek-char))
+ ((or (tok:sfp rec) list->string) clist))))
+ ((eqv? 0 proc) (tokenize))
+ (else
+ (do ((cl clist (begin (set-cdr! cl (list (tok:read-char))) (cdr cl))))
+ ((not (let* ((prec (tok:lookup *syn-rules* (tok:peek-char)))
+ (cclass (and prec (tok:cc prec))))
+ (or (eqv? cclass proc)
+ (eqv? cclass (+ -1 proc)))))
+ ((tok:sfp rec) clist)))))))
+
+;;; PREC:NUD is the null denotation (function and arguments to call when no
+;;; unclaimed tokens).
+;;; PREC:LED is the left denotation (function and arguments to call when
+;;; unclaimed token is on left).
+;;; PREC:LBP is the left binding power of this LED. It is the first
+;;; argument position of PREC:LED
+
+(define (prec:nudf alist self)
+ (let ((pair (assoc (cons 'nud self) alist)))
+ (and pair (cdr pair))))
+(define (prec:ledf alist self)
+ (let ((pair (assoc (cons 'led self) alist)))
+ (and pair (cdr pair))))
+(define (prec:lbp alist self)
+ (let ((pair (assoc (cons 'led self) alist)))
+ (and pair (cadr pair))))
+
+(define (prec:call-or-list proc . args)
+ (prec:apply-or-cons proc args))
+(define (prec:apply-or-cons proc args)
+ (if (procedure? proc) (apply proc args) (cons (or proc '?) args)))
+
+;;; PREC:SYMBOLFY and PREC:DE-SYMBOLFY are not exact inverses.
+(define (prec:symbolfy obj)
+ (cond ((symbol? obj) obj)
+ ((string? obj) (string->symbol obj))
+ ((char? obj) (string->symbol (string obj)))
+ (else obj)))
+
+(define (prec:de-symbolfy obj)
+ (cond ((symbol? obj) (symbol->string obj))
+ (else obj)))
+
+;;;Calls to set up tables.
+
+(define (prec:define-grammar . synlsts)
+ (set! *syn-defs* (append (apply append synlsts) *syn-defs*)))
+
+(define (prec:make-led toks . args)
+ (map (lambda (tok)
+ (cons (cons 'led (prec:de-symbolfy tok))
+ args))
+ (if (pair? toks) toks (list toks))))
+(define (prec:make-nud toks . args)
+ (map (lambda (tok)
+ (cons (cons 'nud (prec:de-symbolfy tok))
+ args))
+ (if (pair? toks) toks (list toks))))
+
+;;; Produce dynamically augmented grammars.
+(define (prec:process-binds binds rules)
+ (if (and #f (not (null? binds)) (eq? #t (car binds)))
+ (cdr binds)
+ (append binds rules)))
+
+;;(define (prec:replace-rules) some-sort-of-magic-cookie)
+
+;;; Here are the procedures to define high-level grammar, along with
+;;; utility functions called during parsing. The utility functions
+;;; (prec:parse-*) could be incorportated into the defining commands,
+;;; but tracing these functions is useful for debugging.
+
+(define (prec:delim tk)
+ (prec:make-led tk 0 #f))
+
+(define (prec:nofix tk sop)
+ (prec:make-nud tk prec:parse-nofix sop))
+(define (prec:parse-nofix self sop)
+ (prec:call-or-list (or sop (prec:symbolfy self))))
+
+(define (prec:prefix tk sop bp . binds)
+ (prec:make-nud tk prec:parse-prefix sop bp (apply append binds)))
+(define (prec:parse-prefix self sop bp binds)
+ (fluid-let ((*syn-rules* (prec:process-binds binds *syn-rules*)))
+ (prec:call-or-list (or sop (prec:symbolfy self)) (prec:parse1 bp))))
+
+(define (prec:infix tk sop lbp bp . binds)
+ (prec:make-led tk lbp prec:parse-infix sop bp (apply append binds)))
+(define (prec:parse-infix left self lbp sop bp binds)
+ (fluid-let ((*syn-rules* (prec:process-binds binds *syn-rules*)))
+ (prec:call-or-list (or sop (prec:symbolfy self)) left (prec:parse1 bp))))
+
+(define (prec:nary tk sop bp)
+ (prec:make-led tk bp prec:parse-nary sop bp))
+(define (prec:parse-nary left self lbp sop bp)
+ (prec:apply-or-cons (or sop (prec:symbolfy self))
+ (cons left (prec:parse-list self bp))))
+
+(define (prec:postfix tk sop lbp)
+ (prec:make-led tk lbp prec:parse-postfix sop))
+(define (prec:parse-postfix left self lbp sop)
+ (prec:call-or-list (or sop (prec:symbolfy self)) left))
+
+(define (prec:prestfix tk sop bp . binds)
+ (prec:make-nud tk prec:parse-rest sop bp (apply append binds)))
+(define (prec:parse-rest self sop bp binds)
+ (fluid-let ((*syn-rules* (prec:process-binds binds *syn-rules*)))
+ (prec:apply-or-cons (or sop (prec:symbolfy self)) (prec:parse-list #f bp))))
+
+(define (prec:commentfix tk stp match . binds)
+ (append
+ (prec:make-nud tk prec:parse-nudcomment stp match (apply append binds))
+ (prec:make-led tk 220 prec:parse-ledcomment stp match (apply append binds))))
+(define (prec:parse-nudcomment self stp match binds)
+ (fluid-let ((*syn-rules* (prec:process-binds binds *syn-rules*)))
+ (tok:read-through-comment stp match)
+ (prec:advance)
+ (cond ((prec:delim? (force prec:token)) #f)
+ (else (prec:parse1 prec:bp)))))
+(define (prec:parse-ledcomment left lbp self stp match binds)
+ (fluid-let ((*syn-rules* (prec:process-binds binds *syn-rules*)))
+ (tok:read-through-comment stp match)
+ (prec:advance)
+ left))
+(define (tok:read-through-comment stp match)
+ (set! match (if (char? match)
+ (string match)
+ (prec:de-symbolfy match)))
+ (cond ((procedure? stp)
+ (let* ((len #f)
+ (str (call-with-output-string
+ (lambda (sp)
+ (set! len (find-string-from-port?
+ match *prec:port*
+ (lambda (c) (display c sp) #f)))))))
+ (stp (and len (substring str 0 (- len (string-length match)))))))
+ (else (find-string-from-port? match *prec:port*))))
+
+(define (prec:matchfix tk sop sep match . binds)
+ (define sep-lbp 0)
+ (prec:make-nud tk prec:parse-matchfix
+ sop sep-lbp sep match
+ (apply append (prec:delim match) binds)))
+(define (prec:parse-matchfix self sop sep-lbp sep match binds)
+ (fluid-let ((*syn-rules* (prec:process-binds binds *syn-rules*)))
+ (cond (sop (prec:apply-or-cons
+ sop (prec:parse-delimited sep sep-lbp match)))
+ ((equal? (force prec:token) match)
+ (prec:warn 'expression-missing)
+ (prec:advance)
+ '?)
+ (else (let ((ans (prec:parse1 0))) ;just parenthesized expression
+ (cond ((equal? (force prec:token) match)
+ (prec:advance))
+ ((prec:delim? (force prec:token))
+ (prec:warn 'mismatched-delimiter)
+ (prec:advance))
+ (else (prec:warn 'delimiter-expected--ignoring-rest)
+ (do () ((prec:delim? (force prec:token)))
+ (prec:parse1 0))))
+ ans)))))
+
+(define (prec:inmatchfix tk sop sep match lbp . binds)
+ (define sep-lbp 0)
+ (prec:make-led tk lbp prec:parse-inmatchfix
+ sop sep-lbp sep match
+ (apply append (prec:delim match) binds)))
+(define (prec:parse-inmatchfix left self lbp sop sep-lbp sep match binds)
+ (fluid-let ((*syn-rules* (prec:process-binds binds *syn-rules*)))
+ (prec:apply-or-cons
+ sop (cons left (prec:parse-delimited sep sep-lbp match)))))
+
+;;;; Here is the code which actually parses.
+
+(define prec:bp #f) ;dynamically bound
+(define prec:token #f)
+(define (prec:advance)
+ (set! prec:token (delay (tokenize))))
+(define (prec:advance-return-last)
+ (let ((last (and prec:token (force prec:token))))
+ (prec:advance)
+ last))
+
+(define (prec:nudcall self)
+ (let ((pob (prec:nudf *syn-rules* self)))
+ (cond
+ (pob (let ((proc (car pob)))
+ (cond ((procedure? proc) (apply proc self (cdr pob)))
+ (proc (cons proc (cdr pob)))
+ (else '?))))
+ ((char? self) (prec:warn 'extra-separator)
+ (prec:advance)
+ (prec:nudcall (force prec:token)))
+ ((string? self) (string->symbol self))
+ (else self))))
+
+(define (prec:ledcall left self)
+ (let* ((pob (prec:ledf *syn-rules* self)))
+ (apply (cadr pob) left self (cdr pob))))
+
+;;; PREC:PARSE1 is the heart.
+(define (prec:parse1 bp)
+ (fluid-let ((prec:bp bp))
+ (do ((left (prec:nudcall (prec:advance-return-last))
+ (prec:ledcall left (prec:advance-return-last))))
+ ((or (>= bp 200) ;to avoid unneccesary lookahead
+ (>= bp (or (prec:lbp *syn-rules* (force prec:token)) 0))
+ (not left))
+ left))))
+
+(define (prec:delim? token)
+ (or (eof-object? token) (<= (or (prec:lbp *syn-rules* token) 220) 0)))
+
+(define (prec:parse-list sep bp)
+ (cond ((prec:delim? (force prec:token))
+ (prec:warn 'expression-missing)
+ '(?))
+ (else
+ (let ((f (prec:parse1 bp)))
+ (cons f (cond ((equal? (force prec:token) sep)
+ (prec:advance)
+ (cond ((equal? (force prec:token) sep)
+ (prec:warn 'expression-missing)
+ (prec:advance)
+ (cons '? (prec:parse-list sep bp)))
+ ((prec:delim? (force prec:token))
+ (prec:warn 'expression-missing)
+ '(?))
+ (else (prec:parse-list sep bp))))
+ ((prec:delim? (force prec:token)) '())
+ ((not sep) (prec:parse-list sep bp))
+ ((prec:delim? sep) (prec:warn 'separator-missing)
+ (prec:parse-list sep bp))
+ (else '())))))))
+
+(define (prec:parse-delimited sep bp delim)
+ (cond ((equal? (force prec:token) sep)
+ (prec:warn 'expression-missing)
+ (prec:advance)
+ (cons '? (prec:parse-delimited sep delim)))
+ ((prec:delim? (force prec:token))
+ (if (not (equal? (force prec:token) delim))
+ (prec:warn 'mismatched-delimiter))
+ (if (not sep) (prec:warn 'expression-missing))
+ (prec:advance)
+ (if sep '() '(?)))
+ (else (let ((ans (prec:parse-list sep bp)))
+ (cond ((equal? (force prec:token) delim))
+ ((prec:delim? (force prec:token))
+ (prec:warn 'mismatched-delimiter))
+ (else (prec:warn 'delimiter-expected--ignoring-rest)
+ (do () ((prec:delim? (force prec:token)))
+ (prec:parse1 bp))))
+ (prec:advance)
+ ans))))
+
+(define (prec:parse grammar delim . port)
+ (set! delim (prec:de-symbolfy delim))
+ (fluid-let ((*syn-rules* (append (prec:delim delim) grammar))
+ (*prec:port* (if (null? port) (current-input-port) (car port))))
+ (prec:advance) ; setup prec:token with first token
+ (cond ((eof-object? (force prec:token)) (force prec:token))
+ ((equal? (force prec:token) delim) #f)
+ (else
+ (let ((ans (prec:parse1 0)))
+ (cond ((eof-object? (force prec:token)))
+ ((equal? (force prec:token) delim))
+ (else (prec:warn 'delimiter-expected--ignoring-rest)
+ (do () ((or (equal? (force prec:token) delim)
+ (eof-object? (force prec:token))))
+ (prec:advance))))
+ ans)))))
+
+(define tok:decimal-digits "0123456789")
+(define tok:upper-case "ABCDEFGHIJKLMNOPQRSTUVWXYZ")
+(define tok:lower-case "abcdefghijklmnopqrstuvwxyz")
+(define tok:whitespaces
+ (do ((i (+ -1 (min 256 char-code-limit)) (+ -1 i))
+ (ws "" (if (char-whitespace? (integer->char i))
+ (string-append ws (string (integer->char i)))
+ ws)))
+ ((negative? i) ws)))
+
+;;;;The parse tables.
+;;; Definitions accumulate in top-level variable *SYN-DEFS*.
+(set! *syn-defs* '()) ;Make sure *SYN-DEFS* is empty.
+
+;;; Ignore Whitespace characters.
+(prec:define-grammar (tok:char-group 0 tok:whitespaces #f))
+
+;;; On MS-DOS systems, <ctrl>-Z (26) needs to be ignored in order to
+;;; avoid problems at end of files.
+(case (software-type)
+ ((MSDOS)
+ (if (not (char-whitespace? (integer->char 26)))
+ (prec:define-grammar (tok:char-group 0 (integer->char 26) #f))
+ )))
+
+;;; Save these convenient definitions.
+(define *syn-ignore-whitespace* *syn-defs*)
+(set! *syn-defs* '())
+
+(define (prec:trace)
+ (require 'trace)
+ (trace prec:parse prec:parse1
+ prec:parse-delimited prec:parse-list
+ prec:call-or-list prec:apply-or-cons
+ ;;tokenize prec:advance-return-last prec:advance
+ prec:nudcall prec:ledcall
+ prec:parse-nudcomment prec:parse-ledcomment
+ prec:parse-delimited prec:parse-list
+ prec:parse-nary prec:parse-rest
+ prec:parse-matchfix prec:parse-inmatchfix
+ prec:parse-prefix prec:parse-infix prec:parse-postfix
+ ;;prec:delim?
+ ;;prec:ledf prec:nudf prec:lbp
+ )
+ (set! *qp-width* 333))
+
+;;(begin (trace-all "prec.scm") (set! *qp-width* 333))
+;;(pretty-print (grammar-read-tab (get-grammar 'standard)))
+;;(prec:trace)