;; ** * * * * * * * * * * * * * * * * ** 
;; **  Start of the AP-F Parser...    **
;; ** * * * * * * * * * * * * * * * * **

(module apf-parse mzscheme
  
  (require (lib "etc.ss")
           (lib "list.ss")
           (lib "lex.ss" "parser-tools")
           (prefix : (lib "lex-sre.ss" "parser-tools"))
           "list-util.scm")
  
  (provide gen-parse
           add-keywords!
           (struct sum-rule (sym goesto))
           (struct prod-rule (sym goesto)))
  
  ;; End-Of-File Token
  (define-struct EOF ())
  ;; General Token (wrapped string)
  (define-struct tok-string (lexeme))
  
  ;; Lexer Abbreviations
  (define-lex-abbrevs
    (lower-letter (:/ "a" "z"))
    (upper-letter (:/ #\A #\Z))
    (digit (:/ "0" "9")))
  
  ;; Keywords list, (listof string)
  (define KEYWORDS '())
  
  (define (add-keywords! kws)
    (set! KEYWORDS (append kws KEYWORDS)))
  
  (define (check-keyword str othrws)
    (if (ormap (lambda (s) (string=? str s)) KEYWORDS)
        (make-tok-string str)
        othrws))
  
  ;; Simple Lexer... Could add String constants as well
  (define the-lexer
    (lexer
     [(eof) (make-EOF)]
     ;;[(union ,keys) (make-tok-string lexeme)]
     [(:or #\tab #\space #\newline #\linefeed) (the-lexer input-port)]
     [(:or "=" "+" "-" "*" "/" "(" ")" "{" "}" "<" ">" "^" "&" "\\" ".")
      (make-tok-string lexeme)]
     [(:: "t" "r" "u" "e") (check-keyword lexeme #t)]
     [(:: "f" "a" "l" "s" "e") (check-keyword lexeme #f)]
     [(:: "'" (char-complement "'") "'") (cadr (string->list lexeme))]
     [(:: (:+ (:or lower-letter upper-letter))
          (:* (:or lower-letter upper-letter digit)))
      (check-keyword lexeme (string->symbol lexeme))]
     [(:+ digit) (string->number lexeme)]
     [(:: "\"" (:+ (char-complement "\"")) "\"") lexeme]
     [(:: (:+ digit) #\. (:* digit)) (string->number lexeme)]))


  ;;** A Rule is either:
  ;;     -- Prod-Rule, or
  ;;     -- Sum-Rule
  
  ;; Prod-Rule is (make-prod-rule Symbol (listof Symbol-Or-String))
  (define-struct prod-rule (sym goesto))
  ;; Sum-Rule is (make-sum-rule Symbol (listof Symbol))
  (define-struct sum-rule (sym goesto))
  
  ;; rule-sym: Rule -> Symbol
  ;; Get the name of a Rule
  (define (rule-sym r)
    (cond [(prod-rule? r) (prod-rule-sym r)]
          [(sum-rule? r) (sum-rule-sym r)]
          [else (error 'bad "BAD: ~a" r)]))
  
  ;; The EmptySet == the EmptyList
  (define empty-set '())
  
  ;; set-single: Any -> Set
  ;; Make a singleton Set
  (define (set-single it) (list it))
  
  ;; set-contains?: Set Any -> Boolean
  ;; Does the given Set contain the given element?
  (define (set-contains? set it)
    (if (null? set) #f
        (or (equal? (car set) it)
            (set-contains? (cdr set) it))))
  
  ;; set-union: Set Set -> Set
  ;; Compute the Union of two Sets
  (define (set-union set1 set2)
    (cond [(null? set1) set2]
          [(null? set2) set1]
          [(set-contains? set2 (car set1)) (set-union (cdr set1) set2)]
          [else (cons (car set1) (set-union (cdr set1) set2))]))
  
  ;; car-set-rules: Symbol (listof Rules) (listof Rules) -> Set
  ;; car set of a rule without 'empty's
  (define (car-set-rules sym lor glor)
    (if (null? lor) empty-set
        (let ((r (car lor)))
          (if (symbol=? (rule-sym r) sym)
              (cond [(prod-rule? r) (car-set sym (prod-rule-goesto r) glor)]
                    [(sum-rule? r) 
                     (foldl (lambda (s set)
                              (set-union (car-set-rules s glor glor)
                                         set))
                            empty-set
                            (sum-rule-goesto r))])
              ;; Keep looking...
              (car-set-rules sym (cdr lor) glor)))))
  
  ;; car-set: List-Of-Token Set-of-String -> Set-of-String
  ;; car set without 'empty's
  (define (car-set s lot lor)
    (if (null? lot) (error 'car-set "Cannot Be Empty : ~a" s)
        (let ((f (car lot)))
          (cond [(string? f) (set-single f)]
                [(symbol? f)
                 (cond [(or (symbol=? f 'number)
                            (symbol=? f 'string)
                            (symbol=? f 'char)
                            (symbol=? f 'symbol)
                            (symbol=? f 'boolean)) (set-single f)]
                       [else (car-set-rules (car lot) lor lor)])]))))
  
  ;; grammar-syms: (listof Rule) -> (listof Symbol)
  ;; Return just the 
  (define (grammar-syms lor)
    (foldl (lambda (r s) (set-union (set-single (rule-sym r)) s)) empty-set lor))
  
  ;; all-car-sets: (listof Rules) -> (listof (listof Symbol Set))
  ;; Compute all the car-sets of all the Rules
  (define (all-car-sets lor)
    (append (list (list 'number (set-single 'number))
                  (list 'string (set-single 'string))
                  (list 'boolean (set-single 'boolean))
                  (list 'symbol (set-single 'symbol))
                  (list 'char (set-single 'char)))
            (map (lambda (r) 
                   (let ((s (rule-sym r)))
                     (list s (car-set-rules s lor lor)))) lor)))
  
  ;; type-of: Any -> Symbol
  ;; Return the TypeSymbol of a Token
  (define (type-of a)
    (cond [(symbol? a) 'symbol]
          [(number? a) 'number]
          [(string? a) 'string]
          [(char? a) 'char]
          [(boolean? a) 'boolean]
          [(EOF? a) 'EOF]
          [else 'unknown]))
  
  ;; current-token: Any
  ;; The Current input Token... '() if none
  (define current-tokens '())
  
  ;; peek-token: Lexer -> Any
  ;; Peek at the car token of the input, don't remove it
  (define (peek-tokens lex off)
    (if (<= (length current-tokens) off)
        (set! current-tokens (append current-tokens 
                                     (read-tokens lex (add1 (- off (length current-tokens)))))))
    (list-ref current-tokens off))
  
  ;; read-tokens: Lexer Number -> (listof Any)
  ;; Read a listof tokens for later lookup...
  (define (read-tokens lex n)
    (if (= n 0) '()
        (cons (lex) (read-tokens lex (sub1 n)))))
        
        
  ;; next-token: Lexer -> Any
  ;; Get and remove the next Token from the input
  (define (next-token lex)
    (if (null? current-tokens) (lex)
        (let ((tmp (first current-tokens)))
          (set! current-tokens (rest current-tokens))
          tmp)))
  
  ;; make-the-lexer: InputPort -> Lexer
  ;; Make a Lexer for the given InputPort
  (define (make-the-lexer port)
    (lambda () (the-lexer port)))
  
  ;; gen-parse: (listof Rule) Symbol InputPort -> Any
  ;; Parse an InputPort using the given Rules, starting with the given Symbol
  (define (gen-parse lor start port)
    (let* ((cars (all-car-sets lor)))
      ;(map (lambda (c) (display c)(newline)) cars)
      (let* ((lexer (make-the-lexer port))
             (result (parse-sym lor cars start lexer)))
        (begin (parse-sym lor cars 'EOF lexer) result))))
  
  ;; parse-sym: (listof Rule) (listof carSets) Symbol Lexer -> Any
  ;; Parse a specific Type (Symbol)
  (define (parse-sym lor cars sym lexer)
    (cond [(or (symbol=? sym 'number) (symbol=? sym 'symbol)
               (symbol=? sym 'string) (symbol=? sym 'boolean)
               (symbol=? sym 'char) (symbol=? sym 'EOF))
           (let* ((tok (next-token lexer))
                  (tt (type-of tok)))
             (if (not (symbol=? sym tt))
                 (error 'parser " Expected <~a> got <~a> ('~a')!!" sym tt tok)
                 tok))]
          [else (let* ((r (find-rule sym lor))
                       (name (rule-sym r)))
                  (cond [(prod-rule? r)
                         (parse-prod lor cars name (prod-rule-goesto r) lexer)]
                        [(sum-rule? r)
                         (parse-choice lor cars name (sum-rule-goesto r)
                                       (peek-tokens lexer 0) lexer)]))]))
  
  
  
  ;; is-a-first?: Token Symbol (listof FirstSets) -> Boolean
  (define (is-a-first? tok choice firsts)
    ;;(map (lambda (a) (display a)(newline)) firsts)(newline)(newline)
    (let* ((fst (find-first choice firsts))
           (res (or (set-contains? fst (type-of tok))
                    (and (tok-string? tok)
                         (set-contains? fst (tok-string-lexeme tok))))))
      res))
  
  ;; find-first: Symbol (listof FirstSets) -> (listof String-Or-Symbol)
  ;; Find the FirstSets for the given Symbol
  (define (find-first sym lof)
    (cond [(null? lof) (error 'find-first "First Not Found For <~a>" sym)]
          [(symbol=? sym (caar lof)) (cadar lof)]
          [else (find-first sym (cdr lof))]))
  
  ;; parse-choice: (listof Rule) (listof FirstSets) Symbol (listof Symbol) Any Lexer -> Any
  ;; Parse a Union (sum type), choose a concrete type based on the given FirstSets
  (define (parse-choice lor firsts name choices tok lexer)
    (let ((matches (filter (lambda (c) (is-a-first? tok c firsts)) choices)))
      (cond [(empty? matches)
             (error 'parse-choice "No Choice Found For <~a> Starting with \"~a\" <~a>"
                    name (if (tok-string? tok) (tok-string-lexeme tok) tok)
                    (type-of tok))]
            [(= (length matches) 1) (parse-sym lor firsts (first matches) lexer)]
            [else (let ((mrules (map (lambda (s) (find-rule s lor)) matches)))
                    (if (ormap sum-rule? mrules)
                        (error 'parse-choice "More than one (non-product) Match!! ~a" matches)
                        (look-ahead lor firsts name mrules 0 tok lexer)))])))
  
  (define (look-ahead lor firsts name prules tok-num tok lexer)
    (let* ((matches (filter (lambda (r)
                              (let ((gt (prod-rule-goesto r)))
                                (if (<= (length gt) tok-num) false
                                    (let ((gt-tok (list-ref gt tok-num)))
                                      (if (string? gt-tok)
                                          (check-strings gt-tok tok)
                                          (is-a-first? tok gt-tok firsts))))))
                            prules))
           (ntok-num (add1 tok-num)))
      (cond [(empty? matches) (error 'parse-choice "No Choice For <~a> With ~a Token of \"~a\" <~a>"
                                     name tok-num (if (tok-string? tok) (tok-string-lexeme tok) tok)
                                     (type-of tok))]
            [(= (length matches) 1)
             (parse-prod lor firsts (prod-rule-sym (first matches))
                         (prod-rule-goesto (first matches)) lexer)]
            [else
             (look-ahead lor firsts name matches ntok-num (peek-tokens lexer ntok-num) lexer)])))
  
  ;; atom->string: Atom -> String
  (define (atom->string a)
    (cond [(string? a) a]
          [(number? a) (number->string a)]
          [(boolean? a) (if a "true" "false")]
          [(symbol? a) (symbol->string a)]
          [(tok-string? a) (tok-string-lexeme a)]
          [else (error 'atom->string "Unhandled Atom: ~a <~a>" a (type-of a))]))
  
  ;; check-strings: String String-or-Symbol -> Boolean
  ;; See if the given string matches the given symbol/string for parsing to
  ;;   be able to continue.
  (define (check-strings lit tok)
    (string=? lit (atom->string tok)))

  
  ;; find-rule: Symbol (listof Rule) -> Rule
  ;; Find the Parse rule for the given Symbol
  (define (find-rule sym lor)
    (cond [(null? lor) (error 'find-rule "Rule Not Found For <~a>" sym)]
          [(symbol=? (rule-sym (car lor)) sym) (car lor)]
          [else (find-rule sym (cdr lor))]))
  
  ;; creator-name: Symbol -> Symbol
  ;; What is the symbol for the creator of the given Type?
  (define (creator-name sym)
    (string->symbol (string-append "make-" (symbol->string sym))))
  
  ;; parse-terminal: String Lexer -> Void
  ;; Parse a Terminal (String) by ignoring it...
  (define (parse-terminal str lexer)
    (let ((tok (next-token lexer)))
      (if (not (or (and (string? tok) (string=? tok str))
                   (string=? str (atom->string tok))))
          (error 'parse-terminal "Expected String \"~a\" Found \"~a\"" str tok))))
  
  ;; parse-prod: (listof Rule) (listof carSets) Symbol (listof String-Or-Symbol) Lexer -> Any
  ;; Parse a Product Rule for 'name', with the body 'goesto'
  (define (parse-prod lor cars name goesto lexer)
    (apply (eval (creator-name name))
           (reverse (foldl (lambda (tt lst)
                             (cond [(symbol? tt) (cons (parse-sym lor cars tt lexer) lst)]
                                   [(string? tt) (begin (parse-terminal tt lexer) lst)]
                                   [else (error 'parse-rule "Bad Parsable <~a> For ~a" tt name)]))
                           '() goesto))))
  )