;; ** * * * * * * * * * * * * * * * * ** ;; ** Start of the AP-F Parser... ** ;; ** * * * * * * * * * * * * * * * * ** (module apf-parse mzscheme (require (lib "etc.ss") (lib "list.ss") (lib "lex.ss" "parser-tools") (prefix : (lib "lex-sre.ss" "parser-tools")) "list-util.scm") (provide gen-parse add-keywords! (struct sum-rule (sym goesto)) (struct prod-rule (sym goesto))) ;; End-Of-File Token (define-struct EOF ()) ;; General Token (wrapped string) (define-struct tok-string (lexeme)) ;; Lexer Abbreviations (define-lex-abbrevs (lower-letter (:/ "a" "z")) (upper-letter (:/ #\A #\Z)) (digit (:/ "0" "9"))) ;; Keywords list, (listof string) (define KEYWORDS '()) (define (add-keywords! kws) (set! KEYWORDS (append kws KEYWORDS))) (define (check-keyword str othrws) (if (ormap (lambda (s) (string=? str s)) KEYWORDS) (make-tok-string str) othrws)) ;; Simple Lexer... Could add String constants as well (define the-lexer (lexer [(eof) (make-EOF)] ;;[(union ,keys) (make-tok-string lexeme)] [(:or #\tab #\space #\newline #\linefeed) (the-lexer input-port)] [(:or "=" "+" "-" "*" "/" "(" ")" "{" "}" "<" ">" "^" "&" "\\" ".") (make-tok-string lexeme)] [(:: "t" "r" "u" "e") (check-keyword lexeme #t)] [(:: "f" "a" "l" "s" "e") (check-keyword lexeme #f)] [(:: "'" (char-complement "'") "'") (cadr (string->list lexeme))] [(:: (:+ (:or lower-letter upper-letter)) (:* (:or lower-letter upper-letter digit))) (check-keyword lexeme (string->symbol lexeme))] [(:+ digit) (string->number lexeme)] [(:: "\"" (:+ (char-complement "\"")) "\"") lexeme] [(:: (:+ digit) #\. (:* digit)) (string->number lexeme)])) ;;** A Rule is either: ;; -- Prod-Rule, or ;; -- Sum-Rule ;; Prod-Rule is (make-prod-rule Symbol (listof Symbol-Or-String)) (define-struct prod-rule (sym goesto)) ;; Sum-Rule is (make-sum-rule Symbol (listof Symbol)) (define-struct sum-rule (sym goesto)) ;; rule-sym: Rule -> Symbol ;; Get the name of a Rule (define (rule-sym r) (cond [(prod-rule? r) (prod-rule-sym r)] [(sum-rule? r) (sum-rule-sym r)] [else (error 'bad "BAD: ~a" r)])) ;; The EmptySet == the EmptyList (define empty-set '()) ;; set-single: Any -> Set ;; Make a singleton Set (define (set-single it) (list it)) ;; set-contains?: Set Any -> Boolean ;; Does the given Set contain the given element? (define (set-contains? set it) (if (null? set) #f (or (equal? (car set) it) (set-contains? (cdr set) it)))) ;; set-union: Set Set -> Set ;; Compute the Union of two Sets (define (set-union set1 set2) (cond [(null? set1) set2] [(null? set2) set1] [(set-contains? set2 (car set1)) (set-union (cdr set1) set2)] [else (cons (car set1) (set-union (cdr set1) set2))])) ;; car-set-rules: Symbol (listof Rules) (listof Rules) -> Set ;; car set of a rule without 'empty's (define (car-set-rules sym lor glor) (if (null? lor) empty-set (let ((r (car lor))) (if (symbol=? (rule-sym r) sym) (cond [(prod-rule? r) (car-set sym (prod-rule-goesto r) glor)] [(sum-rule? r) (foldl (lambda (s set) (set-union (car-set-rules s glor glor) set)) empty-set (sum-rule-goesto r))]) ;; Keep looking... (car-set-rules sym (cdr lor) glor))))) ;; car-set: List-Of-Token Set-of-String -> Set-of-String ;; car set without 'empty's (define (car-set s lot lor) (if (null? lot) (error 'car-set "Cannot Be Empty : ~a" s) (let ((f (car lot))) (cond [(string? f) (set-single f)] [(symbol? f) (cond [(or (symbol=? f 'number) (symbol=? f 'string) (symbol=? f 'char) (symbol=? f 'symbol) (symbol=? f 'boolean)) (set-single f)] [else (car-set-rules (car lot) lor lor)])])))) ;; grammar-syms: (listof Rule) -> (listof Symbol) ;; Return just the (define (grammar-syms lor) (foldl (lambda (r s) (set-union (set-single (rule-sym r)) s)) empty-set lor)) ;; all-car-sets: (listof Rules) -> (listof (listof Symbol Set)) ;; Compute all the car-sets of all the Rules (define (all-car-sets lor) (append (list (list 'number (set-single 'number)) (list 'string (set-single 'string)) (list 'boolean (set-single 'boolean)) (list 'symbol (set-single 'symbol)) (list 'char (set-single 'char))) (map (lambda (r) (let ((s (rule-sym r))) (list s (car-set-rules s lor lor)))) lor))) ;; type-of: Any -> Symbol ;; Return the TypeSymbol of a Token (define (type-of a) (cond [(symbol? a) 'symbol] [(number? a) 'number] [(string? a) 'string] [(char? a) 'char] [(boolean? a) 'boolean] [(EOF? a) 'EOF] [else 'unknown])) ;; current-token: Any ;; The Current input Token... '() if none (define current-tokens '()) ;; peek-token: Lexer -> Any ;; Peek at the car token of the input, don't remove it (define (peek-tokens lex off) (if (<= (length current-tokens) off) (set! current-tokens (append current-tokens (read-tokens lex (add1 (- off (length current-tokens))))))) (list-ref current-tokens off)) ;; read-tokens: Lexer Number -> (listof Any) ;; Read a listof tokens for later lookup... (define (read-tokens lex n) (if (= n 0) '() (cons (lex) (read-tokens lex (sub1 n))))) ;; next-token: Lexer -> Any ;; Get and remove the next Token from the input (define (next-token lex) (if (null? current-tokens) (lex) (let ((tmp (first current-tokens))) (set! current-tokens (rest current-tokens)) tmp))) ;; make-the-lexer: InputPort -> Lexer ;; Make a Lexer for the given InputPort (define (make-the-lexer port) (lambda () (the-lexer port))) ;; gen-parse: (listof Rule) Symbol InputPort -> Any ;; Parse an InputPort using the given Rules, starting with the given Symbol (define (gen-parse lor start port) (let* ((cars (all-car-sets lor))) ;(map (lambda (c) (display c)(newline)) cars) (let* ((lexer (make-the-lexer port)) (result (parse-sym lor cars start lexer))) (begin (parse-sym lor cars 'EOF lexer) result)))) ;; parse-sym: (listof Rule) (listof carSets) Symbol Lexer -> Any ;; Parse a specific Type (Symbol) (define (parse-sym lor cars sym lexer) (cond [(or (symbol=? sym 'number) (symbol=? sym 'symbol) (symbol=? sym 'string) (symbol=? sym 'boolean) (symbol=? sym 'char) (symbol=? sym 'EOF)) (let* ((tok (next-token lexer)) (tt (type-of tok))) (if (not (symbol=? sym tt)) (error 'parser " Expected <~a> got <~a> ('~a')!!" sym tt tok) tok))] [else (let* ((r (find-rule sym lor)) (name (rule-sym r))) (cond [(prod-rule? r) (parse-prod lor cars name (prod-rule-goesto r) lexer)] [(sum-rule? r) (parse-choice lor cars name (sum-rule-goesto r) (peek-tokens lexer 0) lexer)]))])) ;; is-a-first?: Token Symbol (listof FirstSets) -> Boolean (define (is-a-first? tok choice firsts) ;;(map (lambda (a) (display a)(newline)) firsts)(newline)(newline) (let* ((fst (find-first choice firsts)) (res (or (set-contains? fst (type-of tok)) (and (tok-string? tok) (set-contains? fst (tok-string-lexeme tok)))))) res)) ;; find-first: Symbol (listof FirstSets) -> (listof String-Or-Symbol) ;; Find the FirstSets for the given Symbol (define (find-first sym lof) (cond [(null? lof) (error 'find-first "First Not Found For <~a>" sym)] [(symbol=? sym (caar lof)) (cadar lof)] [else (find-first sym (cdr lof))])) ;; parse-choice: (listof Rule) (listof FirstSets) Symbol (listof Symbol) Any Lexer -> Any ;; Parse a Union (sum type), choose a concrete type based on the given FirstSets (define (parse-choice lor firsts name choices tok lexer) (let ((matches (filter (lambda (c) (is-a-first? tok c firsts)) choices))) (cond [(empty? matches) (error 'parse-choice "No Choice Found For <~a> Starting with \"~a\" <~a>" name (if (tok-string? tok) (tok-string-lexeme tok) tok) (type-of tok))] [(= (length matches) 1) (parse-sym lor firsts (first matches) lexer)] [else (let ((mrules (map (lambda (s) (find-rule s lor)) matches))) (if (ormap sum-rule? mrules) (error 'parse-choice "More than one (non-product) Match!! ~a" matches) (look-ahead lor firsts name mrules 0 tok lexer)))]))) (define (look-ahead lor firsts name prules tok-num tok lexer) (let* ((matches (filter (lambda (r) (let ((gt (prod-rule-goesto r))) (if (<= (length gt) tok-num) false (let ((gt-tok (list-ref gt tok-num))) (if (string? gt-tok) (check-strings gt-tok tok) (is-a-first? tok gt-tok firsts)))))) prules)) (ntok-num (add1 tok-num))) (cond [(empty? matches) (error 'parse-choice "No Choice For <~a> With ~a Token of \"~a\" <~a>" name tok-num (if (tok-string? tok) (tok-string-lexeme tok) tok) (type-of tok))] [(= (length matches) 1) (parse-prod lor firsts (prod-rule-sym (first matches)) (prod-rule-goesto (first matches)) lexer)] [else (look-ahead lor firsts name matches ntok-num (peek-tokens lexer ntok-num) lexer)]))) ;; atom->string: Atom -> String (define (atom->string a) (cond [(string? a) a] [(number? a) (number->string a)] [(boolean? a) (if a "true" "false")] [(symbol? a) (symbol->string a)] [(tok-string? a) (tok-string-lexeme a)] [else (error 'atom->string "Unhandled Atom: ~a <~a>" a (type-of a))])) ;; check-strings: String String-or-Symbol -> Boolean ;; See if the given string matches the given symbol/string for parsing to ;; be able to continue. (define (check-strings lit tok) (string=? lit (atom->string tok))) ;; find-rule: Symbol (listof Rule) -> Rule ;; Find the Parse rule for the given Symbol (define (find-rule sym lor) (cond [(null? lor) (error 'find-rule "Rule Not Found For <~a>" sym)] [(symbol=? (rule-sym (car lor)) sym) (car lor)] [else (find-rule sym (cdr lor))])) ;; creator-name: Symbol -> Symbol ;; What is the symbol for the creator of the given Type? (define (creator-name sym) (string->symbol (string-append "make-" (symbol->string sym)))) ;; parse-terminal: String Lexer -> Void ;; Parse a Terminal (String) by ignoring it... (define (parse-terminal str lexer) (let ((tok (next-token lexer))) (if (not (or (and (string? tok) (string=? tok str)) (string=? str (atom->string tok)))) (error 'parse-terminal "Expected String \"~a\" Found \"~a\"" str tok)))) ;; parse-prod: (listof Rule) (listof carSets) Symbol (listof String-Or-Symbol) Lexer -> Any ;; Parse a Product Rule for 'name', with the body 'goesto' (define (parse-prod lor cars name goesto lexer) (apply (eval (creator-name name)) (reverse (foldl (lambda (tt lst) (cond [(symbol? tt) (cons (parse-sym lor cars tt lexer) lst)] [(string? tt) (begin (parse-terminal tt lexer) lst)] [else (error 'parse-rule "Bad Parsable <~a> For ~a" tt name)])) '() goesto)))) )