using Python to process data in CSV format
CSV data:
Comma-separated values (comma-separated values,csv, sometimes referred to as character-delimited values, because delimited characters can also be not commas), whose files store tabular data (numbers and text) in plain text. Plain text means that the file is a sequence of characters and does not contain data that must be interpreted like a binary number. A CSV file consists of any number of records separated by a newline character, each record consists of a field, and the delimiter between the fields is another character or string, most commonly a comma or tab. Typically, all records have exactly the same sequence of fields.
Format:27,20,14,15,14,12,94,64,37,1015,1013,1009,7,5,2,21,8,35,0.00,,,
. csv files can be opened directly with Excel or similar software, and look like our common tabular form.
The code is as follows:
Import csv
FileName = ' Weather.csv '
With open (FileName, "R", encoding= "Utf-8") as F:
Text = Csv.reader (f)
For I in text:
Print (i)
Print ("# # #" *10)
With open (FileName, "R", encoding= "Utf-8") as F:
For I in F.readlines ():
Print (I.split (","))
Working with Excel format data using Python
In addition to Python's third-party module libraries for working with Excel files, xlrd, XLWT, Xluntils, and Pyexcelerator, Python can also use win32com and OPENPYXL modules for processing Excel.
Installing third-party libraries with PIP
Pip Install XLRD
Pip Install XLWT
Pip Install Xluntils
Pip Install Pyexcelerator
XLRD can only read the Excel file, unable to write to the file, XLWT can write to the file, but not in the existing Excel file modification, if there is this requirement, you need to use the Xluntils module, Pyexcelerator module and XLWT similar, You can also use it to generate Excel files.
To read a single-table file using xlrd:
Import xlrd
Def readexcel ():
data = Xlrd.open_workbook (' test.xlsx ')
Table = data.sheets () [0] # Open the first sheet
nrows = table.nrows # Gets the number of rows in the table
For I in Range (nrows): # loop-by-line printing
Print (Table.row_values (i)) #通过row_values来获取每行的值
Readexcel ()
Read a single table complex example:
# Open a workbook
Workbook = Xlrd.open_workbook (' testdata.xlsx ')
# Crawl the names of all sheet pages
worksheets = Workbook.sheet_names ()
Print (Workbook.sheets ())
Print (' worksheets is {0} '. Format (worksheets))
# Navigate to Sheet1
# Worksheet1 = workbook.sheet_by_name (U ' Sheet1 ')
Worksheet1 = Workbook.sheets () [1]
"""
#通过索引顺序获取
Worksheet1 = workbook.sheets () [0]
"""
"""
#遍历所有sheet对象
For Worksheet_name in worksheets:
Worksheet = Workbook.sheet_by_name (worksheet_name)
"""
# traverse all rows in Sheet1 row
Num_rows = Worksheet1.nrows
For Curr_row in range (num_rows):
row = Worksheet1.row_values (Curr_row)
Print (' row%s is%s '% (Curr_row, row))
# Traverse all columns in Sheet1 col
Num_cols = Worksheet1.ncols
For Curr_col in range (Num_cols):
Col = worksheet1.col_values (curr_col)
Print (' col%s is%s '% (Curr_col, col))
# Traverse all cells in Sheet1 cell
For Rown in range (num_rows):
For Coln in range (Num_cols):
Cell = Worksheet1.cell_value (Rown, Coln)
Print (cell)
To write an Excel file using XLWT:
Import XLWT
#创建workbook和sheet对象
Workbook = xlwt. Workbook () #注意Workbook的开头W要大写
Sheet1 = Workbook.add_sheet (' Sheet1 ', cell_overwrite_ok=true)
Sheet2 = Workbook.add_sheet (' Sheet2 ', cell_overwrite_ok=true)
Sheet3 = Workbook.add_sheet (' Sheet3 ', cell_overwrite_ok=true)
#向sheet页中写入数据
Sheet1.write (0,0, ' This should overwrite1 ')
Sheet1.write (0,1, ' aaaaaaaaaaaa ')
Sheet2.write (0,0, ' This should Overwrite2 ')
Sheet2.write ("BBBBBBBBBBBBB")
#-----------Use Styles-----------------------------------
#初始化样式
style = XLWT. Xfstyle ()
#为样式创建字体
Font = XLWT. Font ()
Font.Name = ' Times New Roman '
Font.Bold = True
#设置样式的字体
Style.font = Font
#使用样式
Sheet3.write (0,1, ' some bold times text ', style)
#保存该excel文件, the file with the same name is overwritten directly
Workbook.save (' Test2.xls ')
Print (' Create Excel file complete! ')
Excel handles hyperlinks
Import Codecs
Import XLWT
Book = XLWT. Workbook ()
Sheet_index = Book.add_sheet (' index ')
Line=0
For I in range (9):
link = ' HYPERLINK (' {0}.txt ', ' {1}_11111 ') '. Format (i, i)
Sheet_index.write (line, 0, XLWT. Formula (link))
Line + = 1
Book.save (' Simple2.xls ')
For I in range (0, 9):
File = str (i) + ". txt"
With Codecs.open (file, ' W ') as F:
F.write (str (i) *10)
To modify the contents of an Excel table using Python:
The use of xlsx files is not supported, there is no problem with using XLS directly, and if you use xlsx files, problems are prone to occur.
Import xlrd
Import Xlutils.copy
#打开一个workbook
RB = Xlrd.open_workbook (' Aaa111.xls ')
WB = Xlutils.copy.copy (RB)
#获取sheet对象, the sheet object obtained by Sheet_by_index () does not have the write () method
WS = Wb.get_sheet (0)
#写入数据
Ws.write (Ten, ' changed! ')
#添加sheet页
Wb.add_sheet (' sheetnnn2 ', cell_overwrite_ok=true)
#利用保存时同名覆盖达到修改excel文件的目的, note that unmodified content remains the same
Wb.save (' Aaa111.xls ')
Python handles PDF files
Reading PDF files
From Pdfminer.pdfparser import Pdfparser, pdfdocument
From Pdfminer.pdfparser import Pdfpage
From Pdfminer.pdfinterp import Pdfresourcemanager, pdftextextractionnotallowed
From Pdfminer.pdfinterp import Pdfpageinterpreter
From Pdfminer.pdfdevice import Pdfdevice
From pdfminer.layout import Laparams
From Pdfminer.converter import Pdfpageaggregator
#获取文档对象, you can change the algorithm.pdf to your own file name.
Fp=open ("Test.pdf", "RB")
#创建一个与文档相关联的解释器
Parser=pdfparser (FP)
#PDF文档对象, provide password initialization, no need to take password parameters.
Doc=pdfdocument ()
Parser.set_document (DOC)
Doc.set_parser (parser)
Doc.initialize ()
#检查文件是否允许文本提取
If not doc.is_extractable:
Raise pdftextextractionnotallowed
#链接解释器和文档对象
# parser.set_document (DOC)
#doc. Set_paeser (parser)
#初始化文档
#doc. Initialize ("")
#创建PDF资源管理器对象来存储共享资源
Resource=pdfresourcemanager ()
#参数分析器
Laparam=laparams ()
#创建一个聚合器
Device=pdfpageaggregator (Resource, Laparams=laparam)
#创建PDF页面解释器
Interpreter=pdfpageinterpreter (Resource,device)
#使用文档对象得到页面集合
For page in Doc.get_pages ():
#使用页面解释器来读取
Interpreter.process_page (page)
#使用聚合器来获取内容
Layout=device.get_result ()
For out in layout:
If Hasattr (out, "Get_text"):
Print (Out.get_text ())
Convert HTML to PDF file
Installing the Pdfkit module
Pip Install Pdfkit
#网页转换成pdf
Convert URLs directly to PDF files
Import Pdfkit
Pdfkit.from_url (' http://google.com ', ' out1.pdf ')
#Html转换成pdf
Import Pdfkit
Pdfkit.from_file (' test.html ', ' out2.pdf ')
#字符创转换成pdf
Import Pdfkit
Pdfkit.from_string (' Hello lingxiangxiang! ', ' out3.pdf ')
Merging multiple PDF files
Import PyPDF2
Import OS
#建立一个装pdf文件的数组
Pdffiles = []
For FileName in Os.listdir (' XX '): #遍历该程序所在文件夹内的文件
If Filename.endswith ('. pdf '): #找到以. pdf end of File
Pdffiles.append (FileName) #将pdf文件装进pdfFiles数组内
# Pdffiles.sort () #文件排序
Print (Pdffiles)
Os.chdir ("aming")
PDFWriter = Pypdf2.pdffilewriter () #生成一个空白的pdf文件
For FileName in Pdffiles:
Pdfreader = Pypdf2.pdffilereader (open (FileName, ' RB ')) #以只读方式依次打开pdf文件
For Pagenum in range (pdfreader.numpages):
Print (Pdfreader.getpage (pagenum))
Pdfwriter.addpage (Pdfreader.getpage (pagenum)) #将打开的pdf文件内容一页一页的复制到新建的空白pdf里
Pdfoutput = open (' Combine.pdf ', ' WB ') #生成combine. pdf file
Pdfwriter.write (pdfoutput) #将复制的内容全部写入combine. pdf
Pdfoutput.close ()
Python processing pictures
PIL (Python Imaging library) is the most commonly used image processing library in Python, and if you are python2.x, you can download it at the following address: http://www.pythonware.com/products/pil/ Index.htm, to find the corresponding version to download it.
Note: The PIL module has been replaced in the python3.x Pillow module, the document address: http://pillow.readthedocs.io/en/latest/, directly using PIP3 Install pillow can be installed module, Use the From PIL import Image when importing.
The code is as follows:
From PIL import Image
Image = Image.open ("1.jpg")
Print (Image.format, image.size, Image.mode)
Image.show ()
Results:
JPEG (1920x1080, 1920x1080) RGB
and open the picture and show it.
Three properties of Image:
Format: Identifies the source format of the image and is set to the None value if the file is not read from the file.
Size: Returns a tuple with two elements with a value of width and height in pixels.
Mode:rgb (True color image), plus, L (luminance), CMTK (pre-press image).
An introduction to Image method:
Show (): Display the most recently loaded image
Open (infilename): Opening file
Save (Outfilename): Save file
Crop (left, upper, right, lower): Extracts a rectangle-sized image from the image. It receives a tuple of four elements as a parameter, each element being (left, upper, right, lower), and the origin of the coordinate system (0, 0) is the upper-top corner.
Geometric Processing of Image:
out = Im.resize ((+ +)) #调整图片大小
out = im.rotate #逆时针旋转 45 degree angle.
out = Im.transpose (image.flip_left_right) #左右对换.
out = Im.transpose (Image.flip_top_bottom) #上下对换.
out = Im.transpose (image.rotate_90) #旋转 90 degree angle.
out = Im.transpose (image.rotate_180) #旋转 180 degree angle.
out = Im.transpose (image.rotate_270) #旋转 270 degree angle.
Using a Python cutout
From PIL import Image
Image = Image.open ("1.jpg")
Print (Image.format, image.size, Image.mode)
box = (600, 300, 1050, 660)
Region = image.crop (box)
Region.save ("Cutting.jpg")
The above code tells the picture ((600, 300), (600, 660), (1050, 300), (1050, 660)) The area drawn out is cropped and stored in the Cutting.jpg
Using the Python puzzle
From PIL import Image
Image = Image.open ("1.jpg")
Print (Image.format, image.size, Image.mode)
box = (600, 300, 1050, 660)
egion = image.crop (box)
#egion. Save ("Cutting.jpg")
Region = Egion.transpose (image.rotate_180)
Image.paste (Region, Box)
Image.show ()
Take a picture of your avatar and switch it to 180 degrees, then stitch it together
Scale a picture with Python
From PIL import Image
infile = "2.jpg"
outfile = "New2.jpg"
Image = Image.open (infile)
(x, y) = Image.size
NEWX = 300
newy = Int (y*newx/x)
out = Image.resize ((newx, Newy), Image.antialias)
Out.show ()
Python Processing Verification Code
Import Random
Import string
Import Sys
Import Math
From PIL import Image, Imagedraw, Imagefont, ImageFilter
# font location, different versions of the system will have different
Font_path = ' Msyh.ttf '
# Generate several numbers of verification codes
Number = 4
# Generate the height and width of the captcha picture
Size = (100, 30)
# background color, default is white
bgcolor = (255, 255, 255)
# font color, default is blue
FontColor = (0, 0, 255)
# Interference Line Color. Default is Red
LineColor = (255, 0, 0)
# Do you want to join the interference line
Draw_line = True
# Add the upper and lower bounds of the number of interfering lines
Line_number = 20
# used to randomly generate a string
Def gene_text ():
Source = List (string.ascii_letters)
For index in range (0, 10):
Source.append (str (index))
Return '. Join (Random.sample (source, number)) # number is the digit that generated the verification code
# used to draw interference lines
def gene_line (draw, width, height):
Begin = (Random.randint (0, width), random.randint (0, height))
End = (Random.randint (0, width), random.randint (0, height))
Draw.line ([begin, end], Fill=linecolor)
# Generate Verification Code
Def gene_code ():
width, height = size # width and height
Image = Image.new (' RGBA ', (width, height), bgcolor) # Create picture
Font = Imagefont.truetype (Font_path, 25) # Authentication Code fonts
Draw = Imagedraw.draw (image) # Create brush
Text = Gene_text () # Generate string
Font_width, font_height = font.getsize (text)
Draw.text (((width-font_width)/number, (Height-font_height)/number), text, Font=font, fill=fontcolor) # padding string
If Draw_line:
For I in Range (Line_number):
Gene_line (Draw, width, height)
# image = Image.transform ((width + max, height + ten), Image.affine, (1, -0.3, 0, -0.1, 1, 0), image.bilinear) # Create distortions
Image = Image.filter (imagefilter.edge_enhance_more) # Filter, Border enhancement
Image.Save (' Idencode.png ') # Save captcha Picture
# image.show ()
if __name__ = = "__main__":
Gene_code ()
Python handles csv,excel,pdf and pictures