# coding=utf-8__author__ = ' Twocold ' # text block generator def lines (file): For line in File:yield line yield ' \ n ' def blocks (file) : block = [] for line in lines (file): If Line.strip (): Block.append (line) Elif Block: Yield '. Join (block). Strip () block = []
# coding=utf-8__author__ = ' twocold ' class handler: "" " handles the object of the method called from parser the parser invokes the start () and End () methods at the beginning of each block, using the appropriate Block name as a parameter. The sub () method is used in regular expression substitution. When called with the name ' emphasis ' , it returns the appropriate replacement function. "" " def callback (Self, perfix, name, *args) : method = getattr (Self, perfix+name, none) if callable (method): return method (*args) def start (Self, name): Self.callback (' Start_ ', name) def end (self, name): self.callback (' End_ ', name) def sub (self, Name): def substitution (Match): Result = self.callback (' Sub ', name, match) if result is none: result = match.group (1) return result return substitutionclass htmlrenderer (Handler): "" " the specific handler for the production of HTML htmlrenderer methods can be used in the Super class handler start (), end () and sub () methods to access. They implement a basic label for the HTML document. "" " def start_document (self): print '
# coding=utf-8__author__ = ' twocold ' class rule: "" " base class for all rules "" " def action (self, block, Handler): handler.start (Self.type) handler.feed (block) handler.end ( Self.type) return typeclass headingrule (Rule): "" header takes up one line, up to 70 characters, and is not easy to end with a colon. "" type = ' heading ' def Condition (self, block): return not ' \ n ' not in block and len (block ) <= 70 and not block[1] == ': ' Class titlerule (headingrule): "" " title is the first block of the document, but only if it is a headline "" type = ' title ' first = true def condition (self, block): if not self.first: return False self.first = False return headingrule.condition (Self, block) class ListItemRule (Rule) : "" " list items are paragraphs that begin with a hyphen. To remove the connection character "" type = ' ListItem ' as part of formatting def condition (self,block): return block[0 ] == ' _ ' def action (Self, block, handler): handler.start (Self.type) &NBSP;&NBSP;&NBSP;&NBsp; handler.feed (Block[1:].strip ()) Handler.end (Self.type) return typeclass listrule ( Listitemrule): "" " list from a block that is not a list item and a subsequent list item. Ends after the last consecutive list item. "" type = ' list ' inside = false def condition (Self,block): return type def action (Self, block, handler): if not self.inside and listitemrule.condition (Self, block): handler.start (Self.type) self.inside = True &nbsP;elif self.inside and not listitemrule.condition (Self, block): handler.end (Self.type) self.inside = False return falseclass paragraphrule (Rule): "" " Paragraph is just another rule that does not cover the block "" type = ' paragraph ' def condition (self, block): return true
# coding=utf-8__author__ = ' Twocold ' Import sys, refrom handlers import *from util import *from rules import *class Parser: "" " parser reads text files, applies rules, and controls handlers " "" Def __init__ (Self, handler): self.handler = handler self.rules = [] self.filters = [] def addrule (Self, rule): self.rules.append (rule) def AddFilter (self, pattern, name): def filter (block , handler): return re.sub ( Pattern, handler.sub (name), block) self.filters.append (filter) def parse (Self, file): self.handler.start (' Document ') for block in blocks (file): for filter in self.filters: block = filter (Block, self.handler) for rule in self.rules: if rule.condition (block): last = rule.action (block, Self.handler) &NBSP;&NBSP;&NBSP;&Nbsp; if Last: break self.handler.end (' document ') class Basictextparser (Parser): "" " add rules and filters to the constructor in the specific plume analysis "" " def __init__ (Self, handler): parser.__init__ (Self, handler) Self.addrule (Listrule ()) self.addrule (ListItemRule ()) self.addrule (Titlerule ()) self.addrule (Headingrule ()) self.addrule (ParagraphRule ()) self.addfilter (R ' \* (. +?\*) ', ' emphasis ') self.adDfilter (R ' (http://\.a-za-z/]+) ', ' url ') self.addfilter (R ' ( [\.a-za-z] [Email protected] [\.a-za-z]+[a-za-z]+] ', ' mail ') handler = htmlrenderer () Parser = basictextparser ( Handler) Parser.parse (Sys.stdin)
Python Basic Tutorial Project 1: Even tag source