Python implements batch conversion of word to html and publishing html content to the website, pythonword
This example describes how to batch convert word to html in Python and publish html content to a website. Share it with you for your reference. The specific implementation method is as follows:
# Coding = UTF-8 _ author _ = 'zhm 'from win32com import client as wcimport osimport timeimport randomimport MySQLdbimport redef wordsToHtml (dir ): # batch convert the word documents of folders into html files # Kingsoft WPS is called. KWPS is used for preemptive versions, and WPS word = wc is used for official versions. dispatch ('kwps. application ') for path, subdirs, files in OS. walk (dir): for wordFile in files: wordFullName = OS. path. join (path, wordFile) # print "word:" + wordFullName doc = word. documents. open (wordFu LlName) wordFile2 = unicode (wordFile, "gbk") dotIndex = wordFile2.rfind (". ") if (dotIndex =-1): print '*********************** ERROR: No suffix name is obtained! 'Filesuffix = wordFile2 [(dotIndex + 1):] if (fileSuffix = "doc" or fileSuffix = "docx"): fileName = wordFile2 [: dotIndex] htmlName = fileName + ". html "htmlFullName = OS. path. join (unicode (path, "gbk"), htmlName) # htmlFullName = unicode (path, "gbk") + "\" + htmlName print U' generate an html file: '+ htmlFullName doc. saveAs (htmlFullName, 8) doc. close () word. quit () print "print" Finished! "Def html_add_to_db (dir): # insert converted html files into the database in batches. Conn = MySQLdb. connect (host = 'localhost', port = 3306, user = 'root', passwd = 'root', db = 'test', charset = 'utf8') cur = conn. cursor () for path, subdirs, files in OS. walk (dir): for htmlFile in files: htmlFullName = OS. path. join (path, htmlFile) title = OS. path. splitext (htmlFile) [0] targetDir = 'd:/files/htmls/'# D:/files: the static directory sconds = time configured for the web server. time () msconds = sconds * 1000 targetFile = OS. path. join (TargetDir, str (int (msconds) + str (random. randint (100,100 00) Then '.html ') htmlFile2 = unicode (htmlFile, "gbk") dotIndex = htmlFile2.rfind (". ") if (dotIndex =-1): print '*********************** ERROR: No suffix name is obtained! 'Filesuffix = htmlFile2 [(dotIndex + 1):] if (fileSuffix = "htm" or fileSuffix = "html"): if not OS. path. exists (targetDir): OS. makedirs (targetDir) htmlFullName = OS. path. join (unicode (path, "gbk"), htmlFullName) htFile = open (htmlFullName, 'rb') # obtain the page content htmStrCotent = htFile. read () # Find the image img = re. compile (r " ", Re. i) m = img. findall (htmStrCotent) for tagContent in m: imgSrc = unicode (tagContent, "gbk") imgSrcFullName = OS. path. join (path, imgSrc) # upload an image imgTarget = 'd:/files/images/whzx/'img_sconds = time. time () img_msconds = sconds * 1000 targetImgFile = OS. path. join (imgTarget, str (int (img_msconds) + str (random. randint (100,100 00) Then '.png ') if not OS. path. exists (imgTarget): OS. makedirs (imgTarget) if no T OS. path. exists (targetImgFile) or (OS. path. exists (targetImgFile) and (OS. path. getsize (targetImgFile )! = OS. path. getsize (imgSrcFullName): tmpImgFile = open (imgSrcFullName, 'rb') tmpWriteImgFile = open (targetImgFile, "wb") tmpWriteImgFile. write (tmpImgFile. read () tmpImgFile. close () tmpWriteImgFile. close () htmStrCotent = htmStrCotent. replace (tagContent, targetImgFile. split (":") [1]) if not OS. path. exists (targetFile) or (OS. path. exists (targetFile) and (OS. path. getsize (targetFile )! = OS. path. getsize (htmlFullName): # wrap the converted html file with iframe. IframeHtml = ''' <script type = "text/javascript" language = "javascript"> function iFrameHeight () {var ifm = document. getElementById ("iframepage"); var subWeb = document. frames? Document. frames ["iframepage" 2.16.doc ument: ifm. contentDocument; if (ifm! = Null & subWeb! = Null) {ifm. height = subWeb. body. scrollHeight ;}</script> <iframe src = ''' + targetFile. split (':') [1] + ''' marginheight = "0" marginwidth = "0" frameborder = "0" scrolling = "no" width = "765" height = 100% id = "iframepage" name = "iframepage" onLoad = "iFrameHeight () "> </iframe> ''' tmpTargetFile = open (targetFile," wb ") tmpTargetFile. write (htmStrCotent) tmpTargetFile. close () htFile. close () try: # run SQL = "insert into common_article (title, content) values (% s, % s)" param = (unicode (title, "gbk "), iframeHtml) cur.exe cute (SQL, param) failed T: print "Error: unable to insert data" cur. close () conn. commit () # Close the database connection conn. close () if _ name _ = '_ main _': wordsToHtml ('d:/word') html_add_to_db ('d:/word ')
I hope this article will help you with Python programming.