GNU Bison - A Complete Reentrant Parser Example

A Complete Reentrant Parser Example

The following example shows how to use Bison and flex to write a simple calculator program (only addition and multiplication) and a program for creating an abstract syntax tree. The next two files provide definition and implementation of the syntax tree functions.

/* * Expression.h * Definition of the structure used to build the syntax tree. */ #ifndef __EXPRESSION_H__ #define __EXPRESSION_H__ /** * @brief The operation type */ typedef enum tagEOperationType { eVALUE, eMULTIPLY, ePLUS }EOperationType; /** * @brief The expression structure */ typedef struct tagSExpression { EOperationType type;///< type of operation int value;///< valid only when type is eVALUE struct tagSExpression* left; ///< left side of the tree struct tagSExpression* right;///< right side of the tree }SExpression; /** * @brief It creates an identifier * @param value The number value * @return The expression or NULL in case of no memory */ SExpression* createNumber(int value); /** * @brief It creates an operation * @param type The operation type * @param left The left operand * @param right The right operand * @return The expression or NULL in case of no memory */ SExpression* createOperation( EOperationType type, SExpression *left, SExpression *right); /** * @brief Deletes a expression * @param b The expression */ void deleteExpression(SExpression *b); #endif // __EXPRESSION_H__ /* * Expression.c * Implementation of functions used to build the syntax tree. */ #include "Expression.h" #include /** * @brief Allocates space for expression * @return The expression or NULL if not enough memory */ static SExpression* allocateExpression { SExpression* b = (SExpression *)malloc(sizeof *b); if (b == NULL) return NULL; b->type = eVALUE; b->value = 0; b->left = NULL; b->right = NULL; return b; } SExpression* createNumber(int value) { SExpression* b = allocateExpression; if (b == NULL) return NULL; b->type = eVALUE; b->value = value; return b; } SExpression *createOperation( EOperationType type, SExpression *left, SExpression *right) { SExpression* b = allocateExpression; if (b == NULL) return NULL; b->type = type; b->left = left; b->right = right; return b; } void deleteExpression(SExpression *b) { if (b == NULL) return; deleteExpression(b->left); deleteExpression(b->right); free(b); }

The tokens needed by the Bison parser will be generated using flex.

%{ /* * Lexer.l file * To generate the lexical analyzer run: "flex Lexer.l" */ #include "Expression.h" #include "Parser.h" #include %} %option outfile="Lexer.c" header-file="Lexer.h" %option warn nodefault %option reentrant noyywrap never-interactive nounistd %option bison-bridge LPAREN "(" RPAREN ")" PLUS "+" MULTIPLY "*" NUMBER + WS * %% {WS} { /* Skip blanks. */ } {NUMBER} { sscanf(yytext, "%d", &yylval->value); return TOKEN_NUMBER; } {MULTIPLY} { return TOKEN_MULTIPLY; } {PLUS} { return TOKEN_PLUS; } {LPAREN} { return TOKEN_LPAREN; } {RPAREN} { return TOKEN_RPAREN; } . { } %% int yyerror(const char *msg) { fprintf(stderr,"Error:%s\n",msg); return 0; }

Since the tokens are provided by flex we must provide the means to communicate between the parser and the lexer. The data type used for communication, YYSTYPE, is set using Bison's %union declaration.

Since in this sample we use the reentrant version of both flex and yacc we are forced to provide parameters for the yylex function, when called from yyparse. This is done through Bison's %lex-param and %parse-param declarations.

%{ /* * Parser.y file * To generate the parser run: "bison Parser.y" */ #include "Expression.h" #include "Parser.h" #include "Lexer.h" int yyerror(yyscan_t scanner, SExpression **expression, const char *msg); %} %output "Parser.c" %defines "Parser.h" %define api.pure %lex-param { yyscan_t scanner } %parse-param { SExpression **expression } %parse-param { yyscan_t scanner } %union { int value; SExpression *expression; } %left '+' TOKEN_PLUS %left '*' TOKEN_MULTIPLY %token TOKEN_LPAREN %token TOKEN_RPAREN %token TOKEN_PLUS %token TOKEN_MULTIPLY %token TOKEN_NUMBER %type expr %% input : expr { *expression = $1; } ; expr : expr TOKEN_PLUS expr { $$ = createOperation( ePLUS, $1, $3 ); } | expr TOKEN_MULTIPLY expr { $$ = createOperation( eMULTIPLY, $1, $3 ); } | TOKEN_LPAREN expr TOKEN_RPAREN { $$ = $2; } | TOKEN_NUMBER { $$ = createNumber($1); } ; %%

The code needed to obtain the syntax tree using the parser generated by Bison and the scanner generated by flex is the following.

#include "Expression.h" #include "Parser.h" #include "Lexer.h" #include int yyparse(SExpression **expression, yyscan_t scanner); SExpression *getAST(const char *expr) { SExpression *expression; yyscan_t scanner; YY_BUFFER_STATE state; if (yylex_init(&scanner)) { // couldn't initialize return NULL; } state = yy_scan_string(expr, scanner); if (yyparse(&expression, scanner)) { // error parsing return NULL; } yy_delete_buffer(state, scanner); yylex_destroy(scanner); return expression; } int evaluate(SExpression *e) { switch (e->type) { case eVALUE: return e->value; case eMULTIPLY: return evaluate(e->left) * evaluate(e->right); case ePLUS: return evaluate(e->left) + evaluate(e->right); default: // shouldn't be here return 0; } } int main(void) { SExpression *e = NULL; char test=" 4 + 2*10 + 3*( 5 + 1 )"; int result = 0; e = getAST(test); result = evaluate(e); printf("Result of '%s' is %d\n", test, result); deleteExpression(e); return 0; }

Read more about this topic:  GNU Bison

Famous quotes containing the word complete:

    Seldom, very seldom, does complete truth belong to any human disclosure; seldom can it happen that something is not a little disguised, or a little mistaken.
    Jane Austen (1775–1817)