Here only the data analysis commonly used graphic drawing, as for the complex graphics is not in the scope of this discussion, a few of the graphics to meet the requirements of the data analysis process, as for reporting materials or other high-quality graphics, and then write another about the simple use of ggplot2.
Python's drawing tools are mainly matplotlib, which is not complex to use, but simple to use.
There are two ways to use matplotlib drawings:
1.matplotlib drawing, specifying parameters Data=dataframe or Series
2.pandas handles Dataframe and series, and they have plot methods themselves.
1. Use matplotlib drawing, if use MATLAB friend is not unfamiliar to this.
Import pandas as PD
import numpy as NP
import Matplotlib.pyplot as Plt
from pandas import Dataframe, series
from numpy.random Import randn,rand
import matplotlib as MPL from
matplotlib.pyplot import savefig
mpl.rcparams[' font.sans-serif '] = [' Simhei '] # Specify default font
mpl.rcparams[' axes.unicode_minus '] = False # Solve the problem where the save image is a minus sign '-' displayed as a block
# 1. First create a canvas
fig = plt.figure ()
# 2. Then create a graphics matrix
ax1 = Fig.add_subplot (2,2,1)
ax2 = Fig.add_subplot (2,2,2) C4/>ax3 = Fig.add_subplot (2,2,3)
# 3 View canvas
Fig
# 4. Draw curves individually
plt.plot (RANDN). Cumsum (), ' k--')
# 5. Draw on the graph matrix just created
# 5.1. Histogram
ax1.hist (randn), bins =, color = ' k ', alpha = 0.3)
# 5.2. Scatter chart
Ax2.sca Tter (Np.arange, Np.arange () +3*randn)
# 5.3. Line map (default)
Ax3.plot (RANDN). Cumsum (), ' k--')
# 6. Display the painted graphics in
fig
# or
plt.show ()
The previous code was to create a canvas and then create a graphics matrix, and someone bothered, so the following code.
# shortcut function to create a graphics matrix
# Sharex,sharey shares the x,y axis, that is, the tick marks are the same
fig, axes = plt.subplots (2,2, Sharex = True, Sharey = True)
for I in range (2): for
J in Range (2):
axes[i,j].hist (RANDN), bins=50,
color= ' k ', alpha = 0.5)
Plt.subplots_adjust (wspace = 0,hspace=0)
# Graphics Properties
1.color: Color
1.1 r: Red
1.2 B: Blue
1.3 G: Green
1.3 y: Yellow
2. Data marker Markder
2.1 o: Circle
2.2. : Dot
2.2 D: Prism
3. Line LineStyle
3.1 without parameters is the default drawing point figure
3.2--: Dashed
3.3-: Solid line
4. Transparency
Alpha
5. Size
6. Grid line
Plt.grid (true,color= ' g ', linestyle= '-', linewidth= ' 2 ')
# Region fill
import Matplotlib.pyplot as Plt
import numpy as NP
X=np.linspace (0,5*np.pi,1000)
Y1=np.sin (x)
Y2=np.sin (2*x)
plt.plot (x,y1)
plt.plot (x,y2)
# fill
plt.fill (x,y1, ' B ', alpha=0.3)
Plt.fill (X,y2, ' R ', alpha=0.3)
# fills only the intersecting area
fig=plt.figure ()
AX=PLT.GCA ()
ax.plt (x,y1,x,y2,color = ' black ')
ax.fill_between (x,y1,y2,facecolor= ' Blue ')
Ax.fill_between (x,y1,y2,where=y1>y2,facecolor = ' Yellow ', interpolate=true)
ax.fill_between (x,y1,y2,where=y2>y1,facecolor= ' green ', interpolate=true)
Plt.show ()
# style
plt.style.use (' Ggplot ')
# Print the PLT-supported style
print (plt.style.available)
# histogram and two-dimensional histogram # using the Plt method import NumPy as NP import Matplotlib.pyplot as PLT # normed standardization Specifies whether to show frequency or frequencies Mu = sigma = x = mu + sigma * NP.RANDOM.RANDN (Watts) plt.hist (x, bins=10,color= ' Red ', Normed=true) plt.hist (x, bins=5 0,color= ' green ', Normed=false) plt.show () # two-dimensional histogram x = NP.RANDOM.RANDN (1000) +2 y = np.random.randn (1000) +3 plt.hist2d (x, Y, bins=40) plt.show ()
# integral Map
# In fact, it is to draw a polygon graph to realize the integral map
import Matplotlib.pyplot as plt from
matplotlib.patches import Polygon #多边形
import NumPy as NP
def func (x): Return
-(x-2) * (x-8) +40
x=np.linspace (0,10)
y=func (x)
Fig.ax=plt.subplot ()
plt.plot (x,y, ' R ', linewidth=2)
a,b=2,9 ax.set_xticks
([a,b])
# ax.set_ Yticks ([])
ax.set_xticklabels ([' $a $ ', ' $b $ '])
# Draw the polygon point position of the integral region
Ix=np.linspace (a,b)
Iy=func (ix )
Ixy=zip (Ix,iy)
verts=[(a,0)]+list (IXY) +[(b,0)]
Poly=ploygen (verts,facecolor= ' 0.9 ', egdcolor= ') 0.1 ')
Ax.add_patch (poly)
plt.figtext (0.9,0.05, ' $x $ ')
Plt.figtext (0.1,0.9, ' $y $ ')
x_mathch= ( A+B) *0.4
y_match=30
plt.text (x_match,y_match,
R ' $inta ^b (-(x-2) * (x-8) +40) dx$ ', fontsize=20)
Plt.show ()
# formula
Import Matplotlib.pyplot as Plt
fig=plt.figure ()
ax=fig.add_subplot ()
Ax.set_xlim (1,7)
Ax.set_ylim (1,5)
Ax.text (2,4,r "$ \alpha_i \pi \lambda \omega $",
size=25)
plt.show ()
plt.plot (Randn. Cumsum (), ' b.--')
# set Title, axis scale, axis label, and tick label fig = plt.figure () ax = Fig.add_subplot (1,1,1) Ax.plot (RANDN (1000). Cumsum ()) ticks = Ax.set_xticks ( [0,250,500,750,1000]) #x轴刻度 labels = ax.set_xticklabels ([' One ', ' tow ', ' three ', ' four ', ' five '], rotation=30, font Size= ' small ') #x轴刻度名称 ax.set_title (' My one matplotlib plot ') #标题 Ax.set_xlabel (' stages ') #x轴标题 Fig # Add Legend # is also painted in the same picture Method of making multiple lines fig = plt.figure () ax = Fig.add_subplot (1,1,1) Ax.plot (RANDN (1000). Cumsum (), ' K ', label= ' one ') Ax.plot (
1000). Cumsum (), ' k--', label= ' two ') Ax.plot (RANDN (1000). Cumsum (), ' K. ', label = ' three ') # The location of the legend placement Ax.legend (loc= ' best ') # ax.legend (loc=1) # 1,2,3,4 represents 4 corners of Fig # Save Picture fig.savefig (' pd_fig.png ') # Annotations, and subplot on drawing from datetime import Dateti Me data = pd.read_csv (' spx.csv ', Index_col = 0,parse_dates=true) spx=data[' SPX '] data.columns data.head () fig=plt.figure () ax = Fig.add_subplot (1,1,1) spx.plot (Ax=ax, style= ' k--') crisis_data=[(DateTime (2007,10,11), ' peak of bull market ' ), (DateTime (2008,3,12), ' Bear StRarns fails '), (DateTime (2008,9,15), ' Lehman Bankruptcy ')] for Date,label in crisis_data:ax.annotate (label, xy= (d Ata,spx.asof (date) +50), xytext= (date,spx.asof (date) +200), arrowprops=dict (facecolor= ' black '), vert Icalalignment= ' top ') # axis scale range Ax.set_xlim ([' 1/1/2007 ', ' 1/1/2011 ']) Ax.set_ylim ([600,800]) ax.set_title (' Import dates In 2008-2009 financial crisis ') FIG
save pictures in bulk
If there's only one picture, there's nothing to say, but if you want to draw for each dimension, then save the picture.
The following batch-saved pseudo code
For _ in All_need_plot:
fig = plt.figure ()
ax=fig.add_subplot ()
sub_data[years].plot (Ax=ax, style= ' O ', title= '%s monthly Sales Trend Chart '%lev3 '
file = R ' e:\ Apparel forecast Sales Trend-%s.jpg '%lev3
savefig (file)
time.sleep (0.5) # Watch out for a pause here, or you'll get a problem.
plt.close () # finally remember to close the handle, and then draw the next picture
As I said before, you can draw with Matplotlib, and series and dataframe also encapsulate functions that have drawings. Here are some common graphics.
The
name of a mapping function |
function |
Module |
Plot () |
Line chart, point graph |
Matplotlib and Pandas |
Pie () |
Pie chart |
Matplotlib and Pandas |
hist () |
Histogram, bar chart |
Matplotlib and Pandas |
BoxPlot () |
Box Line diagram |
Pandas |
Plot (Loyg=true) |
Logarithmic graph of Y axis |
Pandas |
Plot (Yerr=true) |
Error bar chart |
Pandas |
Pandas simple Drawing
# 1.matplotlib Method
import Matplotlib.pyplot as Plt
plt.plot (x,y,s)
# S is a graphical option that includes color, style, graphic type, specific parameters as in front.
# 2.pandas Method
D.plot (kind= ' graphics type ')
# D is dataframe or series, default to index as the horizontal axis,
# Each column of data for ordinate automatic drawing, that is, each column of data to draw a graph.
# kind=
# line: Thread
# bar: Bar Chart
# barh,hist: Histogram
# Box: Box line map
# KDE: Density map #
area:
# PI E: Pie
# In addition to kind parameters, you can accept parameters in Plt.plot ().
Common Graphics
# 1. Line diagram (default) s = Series (RANDN), Cumsum (), Index=np.arange (0,100,10)) S.plot (style= ' ko--') #如果是数据框, and each column draws a thread DF = Dataframe (Randn (10,4). Cumsum (0), columns=[' A ', ' B ', ' C ', ' d '],index=np.arange (0,100,10)) Df.plot () # 2. Bar Chart # A histogram is a picture of each number, and if there are multiple numbers, draw more than one column. That is to say, a histogram requires the height of a given column. Note that this is not the same as the histogram, # The Histogram will count itself, and then the drawing, if the painting is relatively thin, is the density curve of fig,axes = Plt.subplots (2,1) data = Series (Np.random.rand), index=list
(' Abcdefghijklmnop ')) Data.plot (kind= ' bar ', Ax=axes[0]) data.plot (kind= ' bar ', ax=axes[1]) FIG df = Dataframe (Np.random.rand (6,4), index= '
One ', ' two ', ' three ', ' four ', ' five ', ' six '], name= ' genus ') DF # Draw a clan column for each line, how many rows there are columns Df.plot (kind= ' bar ') # 2.2 Stacked Column Chart Df.plot (kind= ' bar ', stacked=true,alpha=0.5) tips = pd.read_csv (' tips.csv ') Tips.head () Party_counts=pd.crosstab ( Tips.size,tips.day) party_counts Party_pcts=party_counts.div (Party_counts.sum (1), axis=0) party_pcts party_ Pcts.plot (kind= ' bar ', Stacked=true) # 3. Histogram and density Figure # Note the difference from the preceding histogram, which can automatically count the given interval and draw a height tips[' tip_pct ']=tips[' Tip ']/tips[' Total_bill '] tips[' tip_pct '].hist (bins=50) # 4. Density curve tips[' tip_pct '].plot (kind= ' KDE ') # Two pictures together comp1= Np.random.normal (0,1,size=200) comp2=np.random.normal (10,2,size=200) values=series (Np.concatenate ([COMP1,COMP2]) Values Values.hist (bins=100, alpha=0.3, color= ' K ', Normed=true) values.plot (kind= ' KDE ', style= ' k--') # 5. Scatter map, scatter plot macro = Pd.read_csv (' macrodata.csv ') data = macro[[' CPI ', ' M1 ', ' tbilrate ', ' unemp '] ' Trans_data = np.log (data). diff (). Dropna ( ) Trans_data[-5:] Trans_data.tail () plt.scatter (trans_data[' M1 '],trans_data[' Unemp ')) plt.title (' Change in log%s ' vs Log%s '% (' M1 ', ' Unemp ')) plt.show () # 5.2. Scatter graph matrix Pd.scatter_matrix (trans_data,diagonal= ' KDE ', color= ' K ', alpha=0.3)
Python paint, the best or seaborn, draw a beautiful color statistical graphics, its grouping, faceted, statistics, etc., comparable to ggplot AH.