/*
 * Lexer and Parser for a very simple language
 *
 * Behrooz Badii and Stephen A. Edwards
 */

//////////////////////////////////////////////////////////////////////
class SimpLexer extends Lexer;
options {
    testLiterals = false; // By default, don't check tokens against keywords
    k = 2;                // Need to decide when strings literals end
    charVocabulary = '\3'..'\377'; // Accept all eight-bit ASCII characters
}

PLUS   : '+' ;
MINUS  : '-' ;
TIMES  : '*' ;
DIV    : '/' ;
ASSIGN : '=' ;
SEMI   : ';' ; 

// A little unorthodox: most punctuation characters get their own rule,
// but since we're using "(" and ")" in the parser, we need parenthesis
// to match as keywords, Thus, we set testLiterals true for this rule.
PARENS
options {
    testLiterals = true;
}
    : '(' | ')' ;

protected LETTER : ( 'a'..'z' | 'A'..'Z' ) ;
protected DIGIT  : '0'..'9' ; 

ID
options {
    testLiterals = true;
}
    : LETTER (LETTER | DIGIT | '_')* ;

NUMBER : (DIGIT)+;

// Strings are "like this ""double quotes"" doubled to include them"
// Note that testLiterals are false so we don't have to worry about
// strings such as "if"
STRING :  '"'! ( '"' '"'! | ~('"'))*  '"'!; 

WS  :   ( ' '
        | '\t'
        | '\n' { newline(); }
        | '\r'
        ) { $setType(Token.SKIP); }
    ;

//////////////////////////////////////////////////////////////////////
class SimpParser extends Parser;
options {
    buildAST = true; // Enable AST building
    k = 2;           // Need to distinguish between ID by itself and ID ASSIGN
}

tokens {
  STATEMENTS;
}

file
    : (expr SEMI!)+ EOF!
        { #file = #([STATEMENTS],file); }
    ; 

expr
    : "if"^ expr "then"! expr (options {greedy=true;} : "else"! expr)?
    | "print"^ (STRING | expr)
    | ID ASSIGN^ expr
    | expr1
    ; 

expr1 : expr2 ( (PLUS^ | MINUS^) expr2 )* ; 

expr2 : expr3 ( (TIMES^ | DIV^) expr3 )* ; 

expr3
    : ID
    | "("! expr ")"!
    | NUMBER
    | MINUS^ expr3
    ;