The implementation of the c compiler, the use of flex to achieve string recognition, the use of bison to achieve ast syntax tree construction, flexbison

Source: Internet
Author: User

The implementation of the c compiler, the use of flex to achieve string recognition, the use of bison to achieve ast syntax tree construction, flexbison

I accidentally found a very interesting project on github. It uses flex to recognize strings, uses bison to construct the ast syntax tree, and directly uses ast for calculation and recognition.

For example, the lex file is written in this way,

/ *********************************** /
/ * File: cmm.l version 2.0 * /
/ * Flex version for CMM * /
/ * CMM Interpreter Construction * /
/ *********************************** /
% option noyywrap
% {
#include "globals.h"
#include "util.h"
#include "scan.h"
#include "cmm.tab.h"
%}

digit [0-9]
int_num {digit} +
real_num {digit} + "." {digit} *
letter [a-zA-Z]
identifier {letter} + ({letter} | {digit} | _) * ({letter} + | {digit} +) | {letter} +
whitespace [\ t] *

%%

"if" {return IF;}
"else" {return ELSE;}
"while" {return WHILE;}
"int" {yylval.dataType = Int; return INT;}
"real" {yylval.dataType = Real; return REAL;}
"bool" {yylval.dataType = Bool; return BOOL;}
"read" {return READ;}
"write" {return WRITE;}
"main" {return MAIN;}

{int_num} {yylval.intval = atoi (yytext); return INT_VALUE;}
{real_num} {yylval.realval = atof (yytext); return REAL_VALUE;}
{identifier} {yylval.idName = copyString (yytext); return ID;}
{whitespace} {/ * Do nothing. * /}

"+" {return PLUS;}
"-" {return SUB;}
"*" {return MUL;}
"/" {return DIV;}
"<" {yylval.binaryOperator = 1; return REL_OP;}
">" {yylval.binaryOperator = 2; return REL_OP;}
"<=" {yylval.binaryOperator = 3; return REL_OP;}
"> =" {yylval.binaryOperator = 4; return REL_OP;}
"==" {yylval.binaryOperator = 5; return REL_OP;}
"<>" {yylval.binaryOperator = 6; return REL_OP;}
"&&" {yylval.binaryOperator = 7; return REL_OP;}
"||" {yylval.binaryOperator = 8; return REL_OP;}

"=" {return ASSIGN;}
"(" {return LPAREN;}
")" {return RPAREN;}
";" {return SEMI;}
"{" {return LBRACE;}
"}" {return RBRACE;}
"[" {return LBRACKET;}
"]" {return RBRACKET;}
"," {return COMMA;}

"//" {char c = input ();
while (c! = '\ n')
{if (c == EOF) break;
c = input ();
}
lineno ++;
}
"/ *" {char c;
int flag = 1;
do
{c = input ();
entry1:
if (c == EOF) break;
if (c == '\ n') lineno ++;
if (c == '*')
{c = input ();
if (c == '/')
flag = 0;
else
goto entry1;
}
} while (flag);
}
\ n {lineno ++;}
. {yyerror ("Mystery character% s \ n", yytext); return ERROR;}

%%
/ * Initialize the lexical analysis interface when parsing * /
void iniLexer (void)
{
static int firstTime = TRUE;
lineno = 0;
if (firstTime)
{firstTime = FALSE;
lineno ++;
yyin = source;
yyout = listing;
}
}
/ * For Lexical Analyzer
TokenType getToken (void)
{static int firstTime = TRUE;
TokenType currentToken;
if (firstTime)
{firstTime = FALSE;
lineno ++;
yyin = source;
yyout = listing;
}
currentToken = yylex ();
strncpy (tokenString, yytext, MAXTOKENLEN);
if (TraceScan) {
fprintf (listing, "\ t% d:", lineno);
printToken (currentToken);
}
return currentToken;
} * /

The yacc file is written in this way,

/ *********************************** /
/ * File: cmm.y * /
/ * Bison grammar file about CMM * /
/ * CMM Interpreter Construction * /
/ *********************************** /

/ * Expected 1 move-in / reduce conflict * /
% expect 1
% {

#include "globals.h"
#include "util.h"
#include "scan.h"
#include "parse.h"

static TreeNode * savedTree; / * stores syntax tree for later return * /

%}

% union {
struct treeNode * node;
int intval;
double realval;
char * idName;
int binaryOperator;
int dataType;
}
% token INT_VALUE
% token REAL_VALUE
% token ID
% token INT REAL BOOL

/ * Priority statement * /
% right ASSIGN
% left PLUS SUB
% left MUL DIV
% nonassoc REL_OP
% nonassoc UMINUS
/ * Tokens used in the declaration grammar * /
% token IF ELSE WHILE READ WRITE MAIN
% token LPAREN RPAREN SEMI LBRACE RBRACE LBRACKET RBRACKET COMMA
% token ASSIGN
% token NEWLINE ERROR

% type stmt_list stmt
% type if_stmt decl_stmt compound_stmt while_stmt assign_stmt read_stmt write_stmt
% type exp factor bin_exp
% type type_spec

% start program

%% / * CMM Grammar * /

program: stmt_list
{savedTree = $ 1;}
;
stmt_list: {$$ = NULL;}
| stmt_list stmt
{TreeNode * t = $ 1;
if (t! = NULL)
{while (t-> sibling! = NULL) {t = t-> sibling;}
t-> sibling = $ 2;
$$ = $ 1;
}
else $$ = $ 2;
}
;
stmt: if_stmt {$$ = $ 1;}
| decl_stmt SEMI {$$ = $ 1;}
| compound_stmt {$$ = $ 1;}
| while_stmt {$$ = $ 1;}
| assign_stmt SEMI {$$ = $ 1;}
| read_stmt SEMI {$$ = $ 1;}
| write_stmt SEMI {$$ = $ 1;}
| error {$$ = NULL;}
;
compound_stmt
: LBRACE stmt_list RBRACE
{$$ = newStmtNode (CompoundK);
$$-> child [0] = $ 2;
$$-> lineno = lineno;
}
;
decl_stmt: type_spec ID
{$$ = newStmtNode (DeclK);
$$-> attr.name = $ 2;
/ * Array length is 0 for non-array * /
$$-> arrayLength = 0;
$$-> type = $ 1;
$$-> lineno = lineno;
}
| type_spec ID LBRACKET INT_VALUE RBRACKET
{$$ = newStmtNode (DeclK);
$$-> attr.name = $ 2;
$$-> arrayLength = $ 4;
$$-> type = $ 1;
if ($$-> type == Int) {
int int_array_temp [$ 4];
$$-> array.intArray = int_array_temp;
}
else if ($$-> type == Real) {
double real_array_temp [$ 4];
$$-> array.realArray = real_array_temp;
}
$$-> lineno = lineno;
}
;
type_spec: INT {$$ = $ 1;}
| REAL {$$ = $ 1;}
| BOOL {$$ = $ 1;}
;
if_stmt: IF LPAREN bin_exp RPAREN stmt
{$$ = newStmtNode (IfK);
$$-> child [0] = $ 3;
$$-> child [1] = $ 5;
$$-> child [2] = NULL;
$$-> lineno = lineno;
}
| IF LPAREN bin_exp RPAREN stmt ELSE stmt
{$$ = newStmtNode (IfK);
$$-> child [0] = $ 3;
$$-> child [1] = $ 5;
$$-> child [2] = $ 7;
$$-> lineno = lineno;
}
;
while_stmt: WHILE LPAREN bin_exp RPAREN stmt
{$$ = newStmtNode (WhileK);
$$-> child [0] = $ 3;
$$-> child [1] = $ 5;
$$-> lineno = lineno;
}
;
assign_stmt: ID ASSIGN exp
{$$ = newStmtNode (AssignK);
$$-> attr.name = $ 1;
$$-> child [0] = $ 3;
$$-> child [1] = NULL;
$$-> lineno = lineno;
}
| ID LBRACKET exp RBRACKET ASSIGN exp
{$$ = newStmtNode (AssignK);
$$-> attr.name = $ 1;
/ * child [1] is not NULL to indicate that the referenced variable is an array element * /
/ * child [0] is the index of the array element * /
$$-> child [0] = $ 3;
$$-> child [1] = $ 6;
$$-> lineno = lineno;
}
;
read_stmt: READ LPAREN ID RPAREN
{$$ = newStmtNode (ReadK);
$$-> attr.name = $ 3;
$$-> child [0] = NULL;
$$-> lineno = lineno;
}
| READ LPAREN ID LBRACKET exp RBRACKET RPAREN
{$$ = newStmtNode (ReadK);
$$-> attr.name = $ 3;
$$-> child [0] = $ 5;
$$-> lineno = lineno;
}
;
write_stmt: WRITE LPAREN exp RPAREN
{$$ = newStmtNode (WriteK);
$$-> child [0] = $ 3;
$$-> lineno = lineno;
}
;
exp: factor
{$$ = $ 1;}
| bin_exp
{$$ = $ 1;}
;
factor: INT_VALUE
{$$ = newExpNode (IntValueK);
$$-> value.int_val = $ 1;
$$-> type = Int;
$$-> lineno = lineno;
}
| REAL_VALUE
{$$ = newExpNode (RealValueK);
$$-> value.real_val = $ 1;
$$-> type = Real;
$$-> lineno = lineno;
}
| LPAREN exp RPAREN
{$$ = $ 2;}
| ID
{$$ = newExpNode (IdK);
$$-> attr.name = $ 1;
/ * child [0] is NULL, which means that the variable is not an array.
$$-> child [0] = NULL;
$$-> lineno = lineno;
}
| ID LBRACKET exp RBRACKET
{$$ = newExpNode (IdK);
$$-> attr.name = $ 1;
$$-> child [0] = $ 3;
$$-> lineno = lineno;
}
| error {$$ = NULL;}
;
bin_exp: / * relational operator * /
exp REL_OP exp
{$$ = newExpNode (OpK);
$$-> child [0] = $ 1;
$$-> child [1] = $ 3;
$$-> attr.op = $ 2;
$$-> lineno = lineno;
}
/ * Arithmetic operator * /
| exp PLUS exp
{$$ = newExpNode (OpK);
$$-> child [0] = $ 1;
$$-> child [1] = $ 3;
$$-> attr.op = PLUS;
$$-> lineno = lineno;
}
| exp SUB exp
{$$ = newExpNode (OpK);
$$-> child [0] = $ 1;
$$-> child [1] = $ 3;
$$-> attr.op = SUB;
$$-> lineno = lineno;
}
| exp MUL exp
{$$ = newExpNode (OpK);
$$-> child [0] = $ 1;
$$-> child [1] = $ 3;
$$-> attr.op = MUL;
$$-> lineno = lineno;
}
| exp DIV exp
{$$ = newExpNode (OpK);
$$-> child [0] = $ 1;
$$-> child [1] = $ 3;
$$-> attr.op = DIV;
$$-> lineno = lineno;
}
| SUB exp% prec UMINUS
{$$ = newExpNode (OpK);
$$-> child [0] = $ 2;
$$-> child [1] = NULL;
$$-> attr.op = UMINUS;
$$-> lineno = lineno;
}
;

%%

int yyerror (char * message)
{fprintf (listing, "Syntax error at line% d:% s \ n", lineno, message);
fprintf (listing, "Current token:% s", tokenString);
printToken (yychar);
Error = TRUE;
return 0;
}
/ * The parsing function that interacts with the main function * /
TreeNode * parse (void)
{iniLexer ();
yyparse ();
return savedTree;
}

Compilation is also very simple,

#The makefile for interpreter based on Flex and Bison
CC = gcc
FLAG = -w
interpreter: main.o util.o cmm.tab.o cmm.lex.o symtab.o analyze.o
	$(CC) $(FLAG) -o ../bin\&test/interpreter main.o util.o cmm.tab.o cmm.lex.o symtab.o analyze.o
main.o: main.c parse.h analyze.h
	$(CC) $(FLAG) -c main.c
cmm.tab.o:cmm.tab.c parse.h
	$(CC) $(FLAG) -c cmm.tab.c
cmm.tab.c:cmm.y
	bison -d cmm.y
cmm.lex.o:cmm.lex.c cmm.tab.h
	$(CC) $(FLAG) -c cmm.lex.c
cmm.lex.c:cmm.l
	flex -o cmm.lex.c cmm.l
symtab.o:symtab.c symtab.h globals.h
	$(CC) $(FLAG) -c symtab.c
analyze.o:analyze.c globals.h symtab.h analyze.h
	$(CC) $(FLAG) -c analyze.c
util.o:util.c
	$(CC) $(FLAG) -c util.c

clean:
	-rm *.o

At the end of the test, you can use./interpreter [file_name] directly.

If you are interested, you can download this project and take a look at it to learn how to use flex & bison to quickly implement a script language.


Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.