Parser類用於對C代碼進行文法分析並構造文法樹。
/// <summary> /// Parser class parses the simple C source code to build /// the syntax serial. /// </summary> public class Parser { public enum ParsingErrorType { Warning, UnknownError, UndefinedVariable, UnexpectedKeyword, TokenExpected, SyntaxError, TypeError, FormatError, FunctionBodyUndefined }; public class ParsingEventArg { public Context Context; } public class ParsingWarningEventArg { public Context Context; public Context.LocationInfo Location; public string Description; } public class ParsingErrorEventArg { public Context Context; public Context.LocationInfo Location; public ParsingErrorType Error; public string Description; public bool Continue; } public event EventHandler<ParsingEventArg> OnParsing; public event EventHandler<ParsingWarningEventArg> OnParsingWarning; public event EventHandler<ParsingErrorEventArg> OnParsingError; /////////////////////////////// // Private member variables /////////////////////////////// private SourceCode m_sourceCode = null; private Word m_lastWord = null; private Expression.ExpressionNode m_lastExpNode = null; private Dictionary<char, char> m_escapeCharDict = new Dictionary<char, char>(); private int m_errorCount = 0; private int m_warningCount = 0; public String Source { get { return m_sourceCode.Text; } } public bool HasError { get { return m_errorCount > 0; } } public bool HasWarning { get { return m_warningCount > 0; } } public int ErrorCount { get { return m_errorCount; } } public int WarningCount { get { return m_warningCount; } } public int MaxError = 0; public int MaxWarning = 65535; public Context Parse(SourceCode src) { m_sourceCode = src; Context ctx = new Context(); if (Parse(ctx, src)) { //文法分析完成後,校正是否有未使用的變數或函數,以及對申明而未定義函數的使用 if (ValidateContextReference(ctx)) return ctx; } return null; } ...
文法分析的主要方法定義如下:
private bool Parse(Context ctx, SourceCode src) { bool res = true; ctx.Location.FirstLine = src.AbsoluteLine; ctx.Location.FirstPos = src.AbsolutePos; foreach (SourceCode stmt in src.SplitStatement()) // 逐語句進行處理 { try { // 檢測do...while的while部分 if (ctx.Children.Count > 0 && ctx.Children.Last() is ControlFlow.DoWhileLoop) { if ((ctx.Children.Last() as ControlFlow.DoWhileLoop).Condition == null) { Word wordOfWhile = GetWord(stmt); if (wordOfWhile.Text != "while") { if (!NotifyError(ctx, wordOfWhile.Location, ParsingErrorType.SyntaxError, "\"while\" is expected.")) return false; } else { res = ParseControl_While(ctx, stmt, new Context.LocationInfo() { FirstLine = wordOfWhile.AbsoluteLine, FirstPos = wordOfWhile.AbsoluteStartPos }); if (!res) return false; else continue; } } } if (stmt.Text.EndsWith(";")) // 語句 { res = ParseStatement(ctx, stmt); } else { if (stmt.Text.EndsWith("}")) // 函數體或塊 { if (stmt.Text.StartsWith("{")) // 塊 { SourceCode blockSrc = new SourceCode() { LineOffset = stmt.AbsoluteLine, PosOffset = stmt.AbsolutePos, Text = stmt.Text.Substring(1, stmt.Text.Length - 2) }; Block block = new Block() { Name = Context.GetAnonymousName("block"), Location = new Context.LocationInfo() { FirstLine = stmt.AbsoluteLine, FirstPos = stmt.AbsolutePos } }; ctx.AddChild(block); res = Parse(block, blockSrc); block.Location.LastLine = stmt.AbsoluteLine; block.Location.LastPos = stmt.AbsolutePos; } else { // 函數 // 過濾控制結構 Word wordOfControlFlow = GetWord(stmt); if (Context.IsControlFlow(wordOfControlFlow.Text)) { res = ParseControlFlow(ctx, stmt, wordOfControlFlow); } else { stmt.ResetPos(); res = ParseFunction(ctx, stmt, wordOfControlFlow.Location); } } } } } catch (ParseException pe) { if (!NotifyError(ctx, ctx.Location, ParsingErrorType.SyntaxError, pe.Message)) return false; } if (!res) return false; } // for ctx.Location.LastLine = src.AbsoluteLine; ctx.Location.LastPos = src.AbsolutePos; return true; }
語句處理,分三種情況:申明、控制結構及運算式。
private bool ParseStatement(Context ctx, SourceCode src) { Word firstWord = GetWord(src); if (Context.IsDataType(firstWord.Text)) // 以類型打頭 { //變數或函數申明 return ParseDeclare(ctx, src, firstWord); } else if (Context.IsControlFlow(firstWord.Text)) // 控制結構 { //Control return ParseControlFlow(ctx, src, firstWord); } else { // 運算式 src.ResetPos(); return ParseExpression(ctx, src, firstWord.Location); } }
函數解析的實現如下:
private bool ParseFunction(Context ctx, SourceCode src, Context.LocationInfo loc) { // 定位函數體 while (!src.Eof && src.CurrentChar != '{') src.NextChar(); // 頭部位置資訊 Context.LocationInfo headerLoc = loc; headerLoc.LastPos = src.AbsolutePos - 1; // 函數頭部 SourceCode funcHeader = new SourceCode() { PosOffset = loc.FirstPos, LineOffset = loc.FirstLine, Text = src.Text.Substring(0, src.Column) }; // 解析頭部 // 如成功, 一個FunctionDefine將被添加到當前Context的尾 if (!ParseStatement(ctx, funcHeader)) return false; src.NextChar(); // skip '{' // 函數體 SourceCode bodyStmt = new SourceCode() { PosOffset = src.AbsolutePos, LineOffset = src.AbsoluteLine, Text = src.Text.Substring(src.Column, src.Text.Length - src.Column - 1) }; // 函數對象 Function.FunctionDefine funcDef = ctx.Children.Last() as Function.FunctionDefine; funcDef.AddChild(new Block() { Name = Context.GetAnonymousName("block") }); // 遞迴解析函數體 if (Parse(funcDef.Body, bodyStmt)) { funcDef.Location = headerLoc; return true; } return false; }
控制結構的解析如下:
private bool ParseControlFlow(Context ctx, SourceCode src, Word wordOfControlFlow) { bool res = false; switch (wordOfControlFlow.Text) { case "if": res = ParseControl_If(ctx, src, wordOfControlFlow.Location); break; case "else": res = ParseControl_Else(ctx, src, wordOfControlFlow.Location); break; case "for": res = ParseControl_For(ctx, src, wordOfControlFlow.Location); break; case "do": res = ParseControl_DoWhile(ctx, src, wordOfControlFlow.Location); break; case "while": res = ParseControl_While(ctx, src, wordOfControlFlow.Location); break; case "switch": res = ParseControl_Switch(ctx, src, wordOfControlFlow.Location); break; case "continue": res = ParseControl_Continue(ctx, src, wordOfControlFlow.Location); break; case "break": res = ParseControl_Break(ctx, src, wordOfControlFlow.Location); break; case "return": res = ParseControl_Return(ctx, src, wordOfControlFlow.Location); break; default: { // Unsupported control flow. if (!NotifyError(ctx, wordOfControlFlow.Location, ParsingErrorType.SyntaxError, "Unsupported keyword.")) return false; } break; } // switch if (res) NotifyParsing(ctx.Children.Last()); return res; }
以if/else為例,說明控制結構的解析過程。
先看if部分:
private bool ParseControl_If(Context ctx, SourceCode src, Context.LocationInfo loc) { src.SkipSpace(); // get '(' if (src.CurrentChar != '(') if (!NotifyError(ctx, GetLocation(loc.FirstLine, loc.FirstPos, src.AbsoluteLine, src.AbsolutePos), ParsingErrorType.SyntaxError, "'(' is expected.")) return false; // 擷取判斷運算式 SourceCode condition = GetParenthesisCode(src); ControlFlow.IfThen stxIf = new ControlFlow.IfThen() { Location = new Context.LocationInfo() { FirstLine = loc.FirstLine, FirstPos = loc.FirstPos } }; ctx.AddChild(stxIf); //解析判斷運算式 if (!ParseExpression(stxIf, condition, ref stxIf.Condition)) return false; // 嘗試解析then部分代碼 src.SkipSpace(); bool res = false; Block ThenBlock = new Block(); stxIf.AddChild(ThenBlock); if (src.CurrentChar == '{') //塊? { SourceCode code = GetBlockCode(src); res = Parse(ThenBlock, code); stxIf.Location.LastLine = src.AbsoluteLine; stxIf.Location.LastPos = src.AbsolutePos; } else { // 單個語句? SourceCode stmt = new SourceCode() { PosOffset = src.AbsolutePos, LineOffset = src.AbsoluteLine, Text = src.Text.Substring(src.Column) }; res = Parse(ThenBlock, stmt); stxIf.Location.LastLine = stmt.AbsoluteLine; stxIf.Location.LastPos = stmt.AbsolutePos; } // else另案處理 return res; }
再看else部分:
private bool ParseControl_Else(Context ctx, SourceCode src, Context.LocationInfo loc) { // else 不能單獨出現,前面必須有個if。這種處理方式也解決了else與if的就近匹配問題。 if (!(ctx.Children.Last() is ControlFlow.IfThen)) if (!NotifyError(ctx, loc, ParsingErrorType.SyntaxError, "\"else\" should not appear here.")) return false; // 上一個文法可能是多重if/then Context lastStx = ctx.Children.Last(); while (lastStx.Children.Count > 2) // Children數大於2,表示具有else部分,則嘗試取得最後一個if/then { lastStx = lastStx.Children.Last(); } // 再次檢測if/then if (!(lastStx is ControlFlow.IfThen)) if (!NotifyError(ctx, loc, ParsingErrorType.SyntaxError, "Can't find matched \"if\".")) return false; ControlFlow.IfThen stxIf = lastStx as ControlFlow.IfThen; src.SkipSpace(); bool res = false; Block elseBlock = new Block(); stxIf.AddChild(elseBlock); // Block if (src.CurrentChar == '{') { SourceCode code = GetBlockCode(src); res = Parse(elseBlock, code); lastStx.Location.LastLine = src.AbsoluteLine; lastStx.Location.LastPos = src.AbsolutePos; } else { // Statement SourceCode stmt = new SourceCode() { PosOffset = src.AbsolutePos, LineOffset = src.AbsoluteLine, Text = src.Text.Substring(src.Column) }; res = Parse(elseBlock, stmt); lastStx.Location.LastLine = stmt.AbsoluteLine; lastStx.Location.LastPos = stmt.AbsolutePos; } return res; }