SQL engine resolution implemented using flex and bison

Source: Internet
Author: User

Due to the teacher's request, the recent implementation of the oceanbase stored procedure does not support the stored procedure until oceanbase 0.4. The main steps of the implementation include

1. Syntax parsing

2. Lexical analysis

3, the specific implementation of the syntax tree steps

Now let's start with the syntax parsing, which is mainly used by the flex (Lexical analyzer generation tool) and Bison (parser Generator), both of which parse the stored procedure statement that the user entered.

To say exactly how to implement the analysis of the SQL statement

1. First create a Lex file

%option noyywrap nodefault Yylineno case-insensitive%{#include "prosql.tab.hpp" #include <stdarg.h> #include < string.h> #include <stdlib.h> #include <stdio.h> #include <malloc.h>//yystype yylval;int oldstate ; extern "C" int Yylex ();//extern "C" int yyparse (); extern "C" void Yyerror (const char *s, ...); extern char Globalinputtext[10000];extern int readinputforlexer (char *buffer, int *numbytesread, int maxbytestoread); #u Ndef yy_input#define yy_input (b,r,s) readinputforlexer (b,&r,s)%}%x comment%%create{return CREATE; procedure{return PROCEDURE;} sql{return SQL;} declare{return DECLARE;} set{return SET;} begin{return begint;} end{return END;} int{return INT;} varchar{return VARCHAR;} date{return DATE;} time{return time;} double{return DOUBLE;} if{return IF;} then{return then;} else{return ELSE; endif{return ENDIF;} for{return for;} when{return when;} while{return while;} [0-9]+{yylval.strval = StrDup (Yytext);/*printf ("number=%s\n", Yylval.Strval); */return intnum; }/*number*/[0-9]+ "." [0-9]* | ".] [0-9]+| [0-9]+e[-+]? [0-9]+| [0-9]+ "." [0-9]*e[-+]? [0-9]+ | ".] [0-9]*e[-+]? [0-9]+{yylval.strval = StrDup (Yytext);/*printf ("float=%s\n", yylval.strval); */return approxnum;} /*double*/true{yylval.strval = "1";/*printf ("bool=%s\n", yylval.strval); */return BOOL;} /*bool*/false{Yylval.strval = "0";/*printf ("bool=%s\n", yylval.strval); */return BOOL;} /*bool*/' (\\.| ' | [^ ' \ n]) *'|\"(\\.| \"\"| [^ "\ n]) *\ "{Char *temp = strdup (yytext); yylval.strval = StrDup (Yytext);//getcorrectstring (Yylval.strval, temp);/*printf (" String=%s\n ", yylval.strval); */return STRING;} /*string*/' (\\.| [^ ' \ n]) *${yyerror ("unterminated string%s", Yytext);} \"(\\.| [^ "\ n]) *${yyerror ("unterminated string%s", Yytext);}  X ' [0-9a-f]+ ' | 0x[0-9a-f]+ {yylval.strval = StrDup (Yytext); return STRING;} 0b[01]+ | B ' [01]+ ' {yylval.strval = StrDup (Yytext); return STRING;}   [-+&~|^/%* (),.;!] {return yytext[0];} " && "{return ANDOP;}" | | " {return OR;} " < "{yyLval.subtok = 1; return COMPARISON; } ">" {yylval.subtok = 2; return COMPARISON;} "! ="|" <> "{yylval.subtok = 3; return COMPARISON;}" = "{Yylval.subtok = 4; return COMPARISON;}" <= "{yylval.subtok = 5; return COMPARISON;}" >= "{yylval.subtok = 6; return COMPARISON;}" <=> "{Yylval.subtok = n; return COMPARISON;}" << "{yylval.subtok = 1; return SHIFT;}" >> "{yylval.subtok = 2; return SHIFT;} [A-za-z]  [A-za-z0-9_]*{yylval.strval = StrDup (Yytext); /*printf ("Name 1=%s\n", yylval.strval); */return name;  } ' [^ '/\\.\n]+ ' {yylval.strval = StrDup (yytext+1);                          /*printf ("Name 2=%s\n", yylval.strval); */yylval.strval[yyleng-2] = 0; return NAME; } ' [^ ' \n]*$ {yyerror ("Unterminated quoted name%s", Yytext);}  @[0-9a-z_.$]+ |@\ "[^" \n]+\ "|@" [^ ' \n]+ ' |@ ' [^ ' \n]+ ' {yylval.strval = StrDup (yytext+1); return Uservar; }@\ "[^" \n]*$ {yyerror ("unterminated quoted user variable%s", YYtext); }@ ' [^ ' \n]*$ {yyerror ("unterminated quoted user variable%s", Yytext);} @ ' [^ ' \n]*$ {yyerror ("unterminated quoted user variable%s", Yytext);} ": =" {return ASSIGN;} #.*;" --"[\t].*;"] /* "{oldstate = Yy_start; BEGIN COMMENT; }<comment> "* *" {BEGIN oldstate;} <comment>.| \;<comment><<eof>> {yyerror ("unclosed COMMENT");}               [\t\n]/* white space */. {Yyerror ("mystery character '%c '", *yytext);} %%


This part is the recognition of each of our custom satisfying regular

Next comes the grammatical recognition of words

%{#include <stdlib.h> #include <stdarg.h> #include <string.h> #include <stdio.h> #include < Malloc.h>char * parsetreeroot=null;extern "c" int yylex (); extern "C" int yyparse (); extern "C" void Yyerror (const char * s, ...); Char globalinputtext[10000];int globalreadoffset;int readinputforlexer (char *buffer, int *numbytesread, int Maxbytestoread); char * MYSTRCAT (char *s1,char *s2) {char *p1= (char *) malloc (strlen (S1) +strlen (S2) +1); strcpy (P1,S1); strcat (P1,S2); return p1;} %}%locations%union {int intval;double floatval;char *strval;int Subtok;} %token <strval> name%token <strval> string%token <strval> intnum%token <strval> Bool%token < strval> approxnum%token <strval> uservar%type <strval> stmt_root create_stmt para_list definition data_t ype pro_block pro_parameters declare_list set_list%type <strval> assign_var pro_body pro_stmt_list sql_stmt expr%r ight assign%left or%left xor%left andop%left not '! ' %left Between%left &LT;subtok> COMPARISON/* = <> < > <= >= <=> */%left ' | ' %left ' & '%left <subtok> SHIFT/* << >> */%left ' + '-'%left ' * '/'% ' mod%left ' ^ '%token create%to Ken Procedure%token proname%token declare%token set%token begint%token end%token sql%token INT%token VARCHAR%token DATE% Token time%token double%token if%token not%token exists%token then%token else%token endif%token FOR%token WHEN%token WHIL E%start stmt_root%%stmt_root:create_stmt pro_block {$$=mystrcat ($1,$2); parsetreeroot=$$;}; Create_stmt:create PROCEDURE NAME ' (' para_list ') ' {char *temp=mystrcat ("CREATE PROCEDURE", $ $); Temp=mystrcat (temp, "(" ); Temp=mystrcat (temp,$5); $$=mystrcat (temp, ") (create) \ n");};/    *opt_if_not_exists: {$$ = 0;} |    IF not EXISTS {$$ = 1;} ; */para_list:definition {$$=$1;}| Definition ', ' para_list {char *temp=mystrcat ($, ","); $$=mystrcat (temp,$3);}; Definition:uservar data_type {char *temp=mystrcat ($, ""); $$=mystrcat (temp,$2);}; Data_type:date {$$= "DATE";} |   time{$$= "Time";} |   VarChar ' (' intnum ') ' {$$= "varchar";} |   int {$$= "int";} |   Double {$$= "double";} ;p ro_block:begint pro_parameters pro_body END {char *temp=mystrcat ("begin\n", $ $); Temp=mystrcat (temp, ""); temp= Mystrcat (temp,$3), $$=mystrcat (temp, "end"),//printf ("Pro_body%s\n", $ $);}; Pro_parameters:declare_list '; ' {$$=mystrcat ($, ";(d eclare) \ n");}| Pro_parameters declare_list '; ' {char *temp=mystrcat ($1,$2); $$=mystrcat (temp, ";(d eclare) \ n");}| Pro_parameters set_list '; ' {char *temp=mystrcat ($1,$2); $$=mystrcat (temp, ";(set) \ n");}; Declare_list:|declare definition {$$=mystrcat ("declare", $ $);}| Declare_list ', ' definition {char *temp=mystrcat ($, ","); $$=mystrcat (temp,$3);}; Set_list:|set Assign_var {$$=mystrcat ("SET", $ $);}| Set_list ', ' Assign_var {char *temp=mystrcat ($, ","); $$=mystrcat (temp,$3);}; Assign_var:uservar COMPARISON expr{Char *temp=mystrcat ($, "="); $$=mystrcat (temp,$3);};   expr:name {$$=$1;} | STRING {$$=$1;} |   intnum {$$=$1;} |   approxnum {$$=$1;} |   BOOL {$$=$1;} ;p ro_body:pro_stmt_list {$$=$1;}; pro_stmt_list:sql_stmt {$$=$1;}| Pro_stmt_list sql_stmt {$$=mystrcat ($1,$2);}; SQL_STMT: | SQL NAME '; ' {$$=mystrcat ($, ";(sql) \ n");}; %%/*int Main (int argc, char* argv[]) {yyparse ();} */int Readinputforlexer (char *buffer, int *numbytesread, int maxbytestoread) {int numbytestoread = Maxbytestoread;int by tesremaining = strlen (globalinputtext)-globalreadoffset;int i;if (Numbytestoread > BytesRemaining) {numBytesToRead = BytesRemaining; }for (i = 0; i < Numbytestoread; i++) {buffer[i] = Globalinputtext[globalreadoffset+i];} *numbytesread = numbytestoread;globalreadoffset + = Numbytestoread;return 0;} void Yyerror (const char *s, ...) {fprintf (stderr, "Error:%s\n", s);} void Zzerror (const char *s, ...) {extern int yylineno;va_list Ap;va_start (AP, s); fprintf (stderr, "%d:error:", Yylineno); vfprintf (stderr, S, AP); fprintf (stderr, "\ n");}  int Yywrap (void) {  return 1;} char* GetSQL () {return parsetreeroot;}


This part is to determine the sequence of the last identified words, to form a complete grammar

These need to be debugged in a Linux environment

bison-d file name

Flex file Name

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.