The Python splitter is often used when we split the article. Of course, a long article will give you some headaches. After reading the following code, I hope you can use the Python splitter to split the article.
- # Converting TXT novels into multiple HTML files
- # @ Author: GreatGhoul
- # @ Email: greatghoul@gmail.com
- # @ Blog: http://greatghoul.javaeye.com
- Import re
- Import OS
- # Regex for the section title
- # Sec_re = re. compile (r'th. + volume \ s +. + \ s + Th. + chapter \ s +. + ')
- # Txt book's path.
- Source_path = 'f: \ .txt'
- Path_pieces = OS. path. split (source_path)
- Novel_title = re. sub (R' (\ .. * $) | ($) ', '', path_pieces [1])
- Target_path = '% s % s_html' % (path_pieces [0], novel_title)
- Section_re = re. compile (R' ^ \ s *. + volume \ s +. * $ ')
- Section_head = '''''
- <Html>
- <Head>
- <Meta http-equiv = "Content-Type" content = "GBK"/>
- <Title> % s </title>
- </Head>
- <Body style = "font-family:,; font-size: 16px;
Margin: 0;
- Padding: 20px; background: # FAFAD2; color: #2B4B86; text
-Align: center; ">
- <H2> % s
- # Escape xml/html
- Def escape_xml (code ):
- Text = code
- Text = re. sub (R' <',' & lt; ', text)
- Text = re. sub (R'> ',' & gt; ', text)
- Text = re. sub (R' & ',' & amp; ', text)
- Text = re. sub (R' \ t', '& nbsp;', text)
- Text = re. sub (R' \ s', '& nbsp;', text)
- Return text
- # Entry of the script
- Def main ():
- # Create the output folder
- If not OS. path. exists (target_path ):
- OS. mkdir (target_path)
- # Open the source file
- Input = open (source_path, 'R ')
- Sec_count = 0
- Sec_cache = []
- Idx_cache = []
- Output = open ('% s \ % d.html' % (target_path, sec_count), 'w ')
- Preface_title = '% s preface' % novel_title
- Output. writelines ([section_head % (preface_title,
Preface_title)])
- Idx_cache.append ('<li> <a href = "mongod.html"> % s </a> </li>'
- % (Sec_count, novel_title ))
- For line in input:
- # Is a chapter's title?
- If line. strip () = '':
- Pass
- Elif re. match (section_re, line ):
- Line = re. sub (R' \ s + ', '', line)
- Print 'converting % s... '% line
- # Write the section footer
- Sec_cache.append ('
- If sec_count = 0:
- Sec_cache.append ('<a href = "index.html"> directory </a> & nbsp; | & nbsp ;')
- Sec_cache.append ('<a href = "mongod.html"> next </a> & nbsp; | & nbsp ;'
- % (Sec_count + 1 ))
- Else:
- Sec_cache.append ('<a href = "mongod.html"> previous article </a> & nbsp; | & nbsp ;'
- % (Sec_count-1 ))
- Sec_cache.append ('<a href = "index.html"> directory </a> & nbsp; | & nbsp ;')
- Sec_cache.append ('<a href = "mongod.html"> next </a> & nbsp; | & nbsp ;'
- % (Sec_count + 1 ))
- Sec_cache.append ('<a name = "bottom" href = "#"> back to the top </a> </p> ')
- Sec_cache.append ('</body>
- Output. writelines (sec_cache)
- Output. flush ()
- Output. close ()
- Sec_cache = []
- Sec_count + = 1
- # Create a new section
- Output = open ('% s \ % d.html' % (target_path, sec_count), 'w ')
- Output. writelines ([section_head % (line, line)])
- Idx_cache.append ('<li> <a href = "mongod.html"> % s </a> </li>'
- % (Sec_count, line ))
- Else:
- Sec_cache.append ('<p style = "text-align: left;"> % s </p>'
- % Escape_xml (line ))
- # Write rest lines
- Sec_cache.append ('<a href = "mongod.html"> next </a> & nbsp; | & nbsp ;'
- % (Sec_count-1 ))
- Sec_cache.append ('<a href = "index.html"> directory </a> & nbsp; | & nbsp ;')
- Sec_cache.append ('<a name = "bottom" href ="
# "> Back to Top </a> </p> </body>
- Output. writelines (sec_cache)
- Output. flush ()
- Output. close ()
- Sec_cache = []
- # Write the menu
- Output = open ('% s \ index.html' % (target_path), 'w ')
- Menu_head = '% s directory' % novel_title
- Output. writelines ([section_head % (menu_head, menu_head ),
'<Ul style = "text-align: left">'])
- Output. writelines (idx_cache)
- Output. writelines (['</ul> <body>
- Output. flush ()
- Output. close ()
- Inx_cache = []
- Print 'completed. % d chapter (s) in total. '% sec_count
- If _ name _ = '_ main __':
- Main ()
The above is an introduction to the Python splitter, and I hope you will have some gains.