使用Python提取中文字元

來源:互聯網
上載者:User

標籤:

#-*- coding: utf-8 -*-##################################################功能:國際化測試,用於提取應用設計包中中文字元,並輸出report#yuebai 20160328#解壓---篩選---整理路徑---提取中文---輸出報告##################################################import osimport shutilimport sysimport reimport zipfileimport globworkPath = "C:\\users\\yuebai\\Desktop\\國際化測試包"reportPath = "C:\\users\\yuebai\\Desktop\\國際化輸出報告"#定義日誌函數def logInfo(info):    log = open("%s\\run.log"%workPath,"a")    log.write("[Info] %s\n"%info)    log.close()    #定義解壓檔案函數def extractZip(fileName,extraPath):    f = zipfile.ZipFile(fileName,"r")    f.extractall(extraPath)    f.close()    logInfo("%s檔案解壓完成"%fileName)    #功能函數,找出中文字元def getChinese(fileFullPath):    isChinese = re.compile("([\u4e00-\u9fa5]+)+?")    f = open(fileFullPath,"r",encoding="UTF-8")     #開啟待提取檔案    f_in = open("%s\\chineseTxt.txt"%reportPath,"a")    #開啟輸出檔案        for line in f.readlines():        getStr = isChinese.findall(str(line))   #逐行判斷提取中文        if not getStr == []:            f_in.write("發現中文字元(╯‘ - ‘)╯︵ ┻━┻ ,檔案路徑為%s\n"%fileFullPath)            f_in.write("%s\n"%line)                        f.close()            f_in.close()        logInfo("尋找完成,輸出報告路徑:%s"%reportPath)#功能函數,列出路徑下所有檔案def listAny(workPath):    if not os.path.exists(workPath):        print("Error,no such dictionary%s,plz check"%workPath)    zipList = os.listdir(workPath)    return zipList#刪除非zip類型的包notZipList = glob.glob("%s\\*[!p]"%workPath)logInfo("notZipList =%s,prepare to delete"%notZipList)for i in notZipList:    os.remove(i)logInfo("刪除非zip包完成")#擷取zip包列表zipList = listAny(workPath)#取zip包解壓#print("開始提取")for zipPackage in zipList:    zipName = os.path.split(zipPackage)[0]      #切割擷取檔案名稱    extraPath = os.path.join(workPath,zipName)      #在當前檔案夾下建立和zip包同名檔案夾,用以做解壓目標路徑    os.makedirs(extraPath)    logInfo("構造解壓路徑完成,extraPath =%s"%extraPath)        extraFilePath = os.path.join(workPath,zipPackage)   #待解壓檔案絕對路徑        #開始解壓zip包,完成後刪除源zip檔案    extractZip(extraFilePath, extraPath)    os.remove(extraFilePath)        #將擴充目錄下流程檔案匯總至\\plan下    if os.path.exists("%s\\Plans\\Extend"%extraPath):        tmpExtendPath = ("%s\\Plans\\Extend"%extraPath)        tmpPlanPath = ("%s\\Plans"%extraPath)                for t in os.listdir(tmpExtendPath):            t_FullPath = ("%s\\%s\\"%(tmpExtendPath,t))            #print("t_FullPath =",t_FullPath)            if os.path.isfile(t_FullPath):                shutil.move(t_FullPath,tmpPlanPath)            #調用getChinese,提取中文字元    for y in os.listdir(tmpPlanPath):        y_fullPath = os.path.join(tmpPlanPath,y)        #print(y_fullPath)                logInfo("開始檢查檔案%s,檢查結果路徑:%s"%(y_fullPath,reportPath))        getChinese(y_fullPath)        print("提取完成,結果路徑:%s"%reportPath)        

 

使用Python提取中文字元

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.