/* * Lexer and Parser for a very simple language * * Behrooz Badii and Stephen A. Edwards */ ////////////////////////////////////////////////////////////////////// class SimpLexer extends Lexer; options { testLiterals = false; // By default, don't check tokens against keywords k = 2; // Need to decide when strings literals end charVocabulary = '\3'..'\377'; // Accept all eight-bit ASCII characters } PLUS : '+' ; MINUS : '-' ; TIMES : '*' ; DIV : '/' ; ASSIGN : '=' ; SEMI : ';' ; // A little unorthodox: most punctuation characters get their own rule, // but since we're using "(" and ")" in the parser, we need parenthesis // to match as keywords, Thus, we set testLiterals true for this rule. PARENS options { testLiterals = true; } : '(' | ')' ; protected LETTER : ( 'a'..'z' | 'A'..'Z' ) ; protected DIGIT : '0'..'9' ; ID options { testLiterals = true; } : LETTER (LETTER | DIGIT | '_')* ; NUMBER : (DIGIT)+; // Strings are "like this ""double quotes"" doubled to include them" // Note that testLiterals are false so we don't have to worry about // strings such as "if" STRING : '"'! ( '"' '"'! | ~('"'))* '"'!; WS : ( ' ' | '\t' | '\n' { newline(); } | '\r' ) { $setType(Token.SKIP); } ; ////////////////////////////////////////////////////////////////////// class SimpParser extends Parser; options { buildAST = true; // Enable AST building k = 2; // Need to distinguish between ID by itself and ID ASSIGN } tokens { STATEMENTS; } file : (expr SEMI!)+ EOF! { #file = #([STATEMENTS],file); } ; expr : "if"^ expr "then"! expr (options {greedy=true;} : "else"! expr)? | "print"^ (STRING | expr) | ID ASSIGN^ expr | expr1 ; expr1 : expr2 ( (PLUS^ | MINUS^) expr2 )* ; expr2 : expr3 ( (TIMES^ | DIV^) expr3 )* ; expr3 : ID | "("! expr ")"! | NUMBER | MINUS^ expr3 ;