Parsing prefix expressions P.J. Drongowski 6 November 2004 Arithmetic expressions * Can be written in three forms: + Infix: Conventional mathematical notation - Operators are written between operands - Parentheses create subexpressions - Operator precedence decides order of evaluation + Postfix: Reverse Polish Notation (RPN) - Operators are written after operands - No parentheses or operator precedence are required + Prefix: Like a reverse RPN - Operators are written before operands - No parentheses or operator precedence are required + Parenthesized prefix expressions - Similar to "functional" notation - Parentheses group operands for the preceding operator Parenthesized prefix expressions * Resemble function calls * Are easy to parse using recursive descent + Functions implement production (rules) + Parse (recognition of expressions) is top-down + Build expression tree from bottom-up + Each function returns a pointer to its node/subtree expression ::= '(' binary_expression ')' | '(' unary_expression ')' | operand binary_expression ::= binop expression expression unary_expression ::= unop expression binop ::= 'add' | 'sub' | 'mul' | 'div' | 'mod' unop ::= 'abs' | 'neg' operand ::= NUMBER | IDENTIFIER Examples (abs 10) (add 10 34) (add abc xyz) (add (mul 30 20) 30) (add (abs 10) (mul 30 (sub 10 xyz))) (abs (neg (abs (neg 10)))) (sqrt (add((sub x2 x1) (sub y2 y1)))) (div (add (min max)) 2) ********************************************************* prefix_tree.h ********************************************************* #ifndef INFIX_H #define INFIX_H #include "lexan.h" const int SYMBOL_SIZE = 63 ; enum NodeTag { BINARY_OPERATOR, UNARY_OPERATOR, NUMBER_OPERAND, NAME_OPERAND } ; class Node { friend class Prefix ; public: Node() { symbol = NULL ; left = right = NULL ; } private: enum NodeTag tag ; // Kind of node char *symbol ; // Symbol (op name, etc.) int value ; // Integer value (binary) Node *left ; // Left expression subtree Node *right ; // Right expression subtree } ; typedef Node *PtrNode ; class Prefix { private: PtrNode tree ; // Expression tree PtrNode operand() ; // Build an operand int isBinop() ; // Return true if binary op PtrNode binary_expression() ; // Build binary expression int isUnop() ; // Return true if unary op PtrNode unary_expression() ; // Build unary expression PtrNode expression() ; // Build prefix expression void inner_preorder(PtrNode subtree) ; LexicalAnalyzer la ; // Lexical analyzer public: bool open(char *filename) ; // Open the input file void close() ; // Close the input file bool parse() ; // Parse the input file void preorder() ; // Preorder traversal } ; #endif ********************************************************* prefix_tree.cpp ********************************************************* #include #include using namespace std ; #include "prefix_tree.h" // Set TRACE_FLAG to true to trace terms // as they are successfully recognized const int TRACE_FLAG = 1 ; void success(char *term) { if (TRACE_FLAG) { cout << "Recognized " << term << endl ; } } // Function: open // Purpose: Open input file and initialize lexical analyzer // Parameters: None // Returns: True if input file was successfully opened bool Prefix::open(char *filename) { return( la.open(filename) ) ; } // Function: close // Purpose: Close input file // Parameters: None void Prefix::close() { la.close() ; } // Function: parse // Purpose: Read and recognize an infix expression // Parameters: None // Returns: True if parse succeeded, else false bool Prefix::parse() { tree = expression() ; if (tree == NULL) { cerr << "Parse failed\n" ; return( false ) ; } return( true ) ; } // Function: operand // Purpose: Recognize an operand. An operand is either // a number or an identifier. By convention, start the // parse with the current token. Return true if the // parse was successful (an operand was recognized.) PtrNode Prefix::operand() { PtrNode node ; char name[256] ; if (la.isNumber()) { success("NUMBER") ; node = new Node ; node->tag = NUMBER_OPERAND ; node->value = la.getNumber() ; la.advance() ; return( node ) ; } else if (la.isIdentifier()) { success("IDENTIFIER") ; node = new Node ; node->tag = NAME_OPERAND ; la.getIdentifier(name) ; node->symbol = new char[strlen(name)+1] ; strcpy(node->symbol, name) ; la.advance() ; return( node ) ; } cerr << "Operand expected" << endl ; la.advance() ; return( NULL ) ; } // Function: binary_expression // Purpose: Recognize a binary expression, i.e., an // expression of the form: binop expression expression // Binops are add, sub, mul, div, rem. // Return true if a binary_expression was recognized. int Prefix::isBinop() { return( la.matches("add") || la.matches("sub") || la.matches("mul") || la.matches("div") || la.matches("rem") ) ; } PtrNode Prefix::binary_expression() { PtrNode node, lexpr, rexpr ; char op[32] ; if (la.isIdentifier() && isBinop()) { la.getIdentifier(op) ; la.advance() ; if ((lexpr = expression()) && (rexpr = expression())) { success("binary_expression") ; node = new Node ; node->tag = BINARY_OPERATOR ; node->symbol = new char[strlen(op)+1] ; strcpy(node->symbol, op) ; node->left = lexpr ; node->right = rexpr ; return( node ) ; } } return( NULL ) ; } // Function: unary_expression // Purpose: Recognize a unary expression, i.e., an // expression of the form: unop expression expression // Unops are abs, neg. Enter this function after an // open parenthesis has been recognized and an operator // is expected next. // Return true if a binary_expression was recognized. int Prefix::isUnop() { return( la.matches("abs") || la.matches("neg") || la.matches("sqrt") ) ; } PtrNode Prefix::unary_expression() { PtrNode node, expr ; char op[32] ; if (la.isIdentifier() && isUnop()) { la.getIdentifier(op) ; la.advance() ; if ((expr = expression()) == NULL) return( NULL ) ; success("unary_expression") ; node = new Node ; node->tag = UNARY_OPERATOR ; node->symbol = new char[strlen(op)+1] ; strcpy(node->symbol, op) ; node->right = expr ; return( node ) ; } return( NULL ) ; } // Function: expression // Purpose: Recognize an expression. An expression is // either a unary expression, a binary expression or // an operand. The start of a binary or unary expression // is detected by an open parenthesis. Recognize any // close parenthesis as needed. PtrNode Prefix::expression() { PtrNode expr ; if (la.isDelimiter() && la.matches("(")) { cout<<"Open paren\n" ; la.advance() ; if (((expr = unary_expression()) != NULL) || ((expr = binary_expression()) != NULL)) { // Successfully recognized inner expression } else { return( NULL ) ; } if (la.isDelimiter() && la.matches(")")) { la.advance() ; success("expression") ; return( expr ) ; } cerr << "Unbalanced open parenthesis?\n" ; return( NULL ) ; } if ((expr = operand()) != NULL) { success("operand") ; return( expr ) ; } cerr << "Expression or operand expected\n" ; return( NULL ) ; } // Function: preorder // Purpose: Display the expression tree by performing // a preorder traversal and display info on each node // as appropriate. void Prefix::inner_preorder(PtrNode subtree) { if (subtree == NULL) return ; switch(subtree->tag) { case BINARY_OPERATOR: { cout << "(" << subtree->symbol << " " ; inner_preorder(subtree->left) ; cout << " " ; inner_preorder(subtree->right) ; cout << ")" ; break ; } case UNARY_OPERATOR: { cout << "(" << subtree->symbol << " " ; inner_preorder(subtree->right) ; cout << ")" ; break ; } case NUMBER_OPERAND: { cout << subtree->value ; break ; } case NAME_OPERAND: { cout << subtree->symbol ; break ; } } } void Prefix::preorder() { cout << endl << "Preorder traversal: " << endl ; inner_preorder(tree) ; cout << endl ; } ********************************************************* prefix_tree_main.cpp ********************************************************* #include #include "prefix_tree.h" int main(int argc, char *argv[]) { Prefix prefix ; if (argc != 2) { cerr << "Usage: prefix expr_file\n" ; return( 0 ) ; } prefix.open( argv[1] ) ; prefix.parse() ; prefix.preorder() ; prefix.close() ; }