;; BF Parser in Scheme ;; Jon Simons (simonsj at ccs dot neu dot edu) ;; August, 2007 ;; ------------------------------------------------------------------------- (module bf-parser mzscheme (require scheme/list) (require "bf-lex.scm") ;; TODO -- Later refine specific things that need to be exported (provide (all-defined)) ;; ------------------------------------------------ ;; BF Program AST with relative pointer annotations ;; ------------------------------------------------ ;; ;; This AST includes relative pointer information for the various tokens. ;; ;; n stands for a number ;; ;; ptr-relation is a number that describes the relative location of ;; the BF pointer (relative to the given operation) ;; ;; ::= ... ;; ;; ::= INC(n, ptr-relation) ;; | DEC(n, ptr-relation) ;; | LT(n) ;; | RT(n) ;; | OUT(ptr-relation) ;; | IN(ptr-relation) ;; | LOOP( ..., ptr-relation) ;; ;; For LOOPs, the ptr-relation is the relative BF pointer location at ;; the beginning of the loop body. ;; make-EXPR : symbol number number (list of EXPRs) -> EXPR (define-struct EXPR (type N ptr exprs)) ;; Shorthand constructors ;; ;; The convention followed is that _only_ these constructors ;; are allowed to be used. There shouldn't be any instances of ;; make-struct being used. ;; ;; TODO: To reduce memory footprint, it would be nice to memoize struct ;; instances so that there is never more than one struct with the ;; exact same information. This could well be a *drastic* ;; improvement for extreme BF programs (extreme being ~16 million ;; characters). (define (INC n ptr) (make-EXPR 'inc n ptr #f)) (define (DEC n ptr) (make-EXPR 'dec n ptr #f)) (define (RT n) (make-EXPR 'rt n #f #f)) (define (LT n) (make-EXPR 'lt n #f #f)) (define (OUT ptr) (make-EXPR 'out #f ptr #f)) (define (IN ptr) (make-EXPR 'in #f ptr #f)) (define (LOOP exprs ptr) (make-EXPR 'loop #f ptr exprs)) ;; Compares two given EXPRs for extensional equality ;; ;; EXPR-equal? : EXPR EXPR -> boolean (define (EXPR-equal? e1 e2) (and (EXPR? e1) (EXPR? e2) (eq? (EXPR-type e1) (EXPR-type e2)) (case (EXPR-type e1) ((inc) (and (= (EXPR-N e1) (EXPR-N e2)) (= (EXPR-ptr e1) (EXPR-ptr e2)))) ((dec) (and (= (EXPR-N e1) (EXPR-N e2)) (= (EXPR-ptr e1) (EXPR-ptr e2)))) ((rt) (= (EXPR-N e1) (EXPR-N e2))) ((lt) (= (EXPR-N e1) (EXPR-N e2))) ((out) (= (EXPR-ptr e1) (EXPR-ptr e2))) ((in) (= (EXPR-ptr e1) (EXPR-ptr e2))) ((loop) (and (= (EXPR-ptr e1) (EXPR-ptr e2)) (andmap EXPR-equal? (EXPR-exprs e1) (EXPR-exprs e2)))) (else (printf "ERROR: unknown EXPR type '~s'~%" (EXPR-type e1)))))) ;; ------------------------------------------ "Parsing" ---- ;; Returns (car tokens) or #f if tokens is empty ;; ;; peek : (list of characters) -> character or #f (define (peek tokens) (if (null? tokens) #f (car tokens))) ;; Recursive descent parser for the BF program grammar. ;; ;; Note that there is one sort-of optimization here; contiguous memory ;; modifiers are collapsed into single tokens. ;; Memory modifiers that would cancel each other out are also taken into ;; account (eg "++-" --> "+"). ;; ;; parse : (list of characters) -> (list of EXPRs) (define (parse token-list) (letrec ((parse (lambda (tokens ptr-loc acc) (case (peek tokens) ;; Kludgily passing ptr-loc back as head item ((#f) (cons ptr-loc acc)) ((#\.) (parse (cdr tokens) ptr-loc (cons (OUT ptr-loc) acc))) ((#\,) (parse (cdr tokens) ptr-loc (cons (IN ptr-loc) acc))) ((#\+) (inc (cdr tokens) 1 ptr-loc acc)) ((#\-) (dec (cdr tokens) 1 ptr-loc acc)) ((#\<) (parse (cdr tokens) (sub1 ptr-loc) acc)) ((#\>) (parse (cdr tokens) (add1 ptr-loc) acc)) ((#\[) (loop (cdr tokens) ptr-loc acc)) ;; Kludgily passing ptr-loc back as head item ((#\]) (cons ptr-loc acc)) (else (printf "ERROR: unknown token '~a'~%" (car tokens)))))) (inc (lambda (tokens N ptr-loc acc) (case (peek tokens) ((#\+) (inc (cdr tokens) (add1 N) ptr-loc acc)) ((#\-) (let ((new-N (sub1 N))) (if (zero? new-N) (parse (cdr tokens) ptr-loc acc) (inc (cdr tokens) new-N ptr-loc acc)))) (else (parse tokens ptr-loc (cons (INC N ptr-loc) acc)))))) (dec (lambda (tokens N ptr-loc acc) (case (peek tokens) ((#\-) (dec (cdr tokens) (add1 N) ptr-loc acc)) ((#\+) (let ((new-N (add1 N))) (if (zero? new-N) (parse (cdr tokens) ptr-loc acc) (dec (cdr tokens) new-N ptr-loc acc)))) (else (parse tokens ptr-loc (cons (DEC N ptr-loc) acc)))))) ;; TODO -- This accumulates stack space linear to the number of ;; loops within the current loop's body... (loop (lambda (tokens ptr-loc acc) (let* ((body (parse tokens ptr-loc ())) ;; Kludge: We know (car body) is the ptr-loc of the ;; last expression in the loop body (ptr-diff (- (car body) ptr-loc)) (tail-expr (cond ((zero? ptr-diff) #f) ((< ptr-diff 0) (LT (abs ptr-diff))) ((> ptr-diff 0) (RT ptr-diff)))) (loop-exprs (reverse (if tail-expr (cons tail-expr (cdr body)) (cdr body))))) (parse (eat-loop-body tokens 1) ptr-loc (cons (LOOP loop-exprs ptr-loc) acc)))))) (reverse (cdr (parse token-list 0 ()))))) ;; Munches down characters in the given list until the loop ;; depth is zero. ;; ;; eat-loop-body : (list of characters) number -> (list of characters) (define (eat-loop-body tokens depth) (if (zero? depth) tokens (case (peek tokens) ((#f) (error "unclosed loop")) ((#\[) (eat-loop-body (cdr tokens) (add1 depth))) ((#\]) (eat-loop-body (cdr tokens) (sub1 depth))) (else (eat-loop-body (cdr tokens) depth))))) )