Pandas has two data structures, one is series and the other is Dataframe
From matplotlib import Pyplot as Plt
Import NumPy as NP
Import Pandas as PD
From NumPy import nan as NA
From pandas import DataFrame, Series
%matplotlib Inline
Series is essentially a one-dimensional array
# Series
# arrays are associative to dictionaries, but can use non-numeric subscript indexes.
can be accessed directly through the index
obj = Series ([4, 7,-5, 3])
Obj
0 -53 3dtype:int64
Obj2 = Series ([4, 7, -5, 3], index=[' d ', ' B ', ' A ', ' C ']) #可以指定数组下标
Print (OBJ2)
Print (Obj2.index)
D 4b 7a -5c 3dtype:int64index ([' d ', ' B ', ' A ', ' C '], dtype= ' object ')
Print (obj2[1])
Print (obj2[' C '])
73
obj = Series ([4, 7,-5, 3])
Obj
0 -53 3dtype:int64
Sdata = {' Ohio ': 35000, ' Texas ': 71000, ' Oregon ': 16000, ' Utah ': 5000}
OBJ3 = Series (sdata) # initializes the serieswith a dictionary, but the order is not guaranteed.
Obj3
Ohio 35000Oregon 16000Texas 71000Utah 5000dtype:int64
Obj3 + obj4 # corresponding index position added, not set to None
obj = Series ([4, 7,-3, 2])
Obj.sort_values () # Sort by value
2 -33 7dtype:int64
index = [' d ', ' C ', ' A ', ' B ']
obj = Series ([4, 7, -3, 2], Index=index)
index = sorted (index)
obj = Obj.reindex (index)
Obj
A -3b 2c 7d 4dtype:int64
# DataFrame
# is essentially a two-dimensional array that locates rows with index and locates columns with columns.
# Col_1 col_2 ... col_n
# index_1 X11 x12 x1n
# index_2 x21 x22 x2n
# ...
# index_m XM1 xm2 xmn
data = {'State ': [' Ohio ', ' Ohio ', ' Ohio ', ' Nevada ', ' Nevada '],
'year ': [2000, 2001, 2002, 2001, 2002],
' Pop': [1.5, 1.7, 3.6, 2.4, 2.9]} # key represents the name of the column, and the corresponding array is the value of the column.
frame = DataFrame (data)
Frame
Note: Dataframe row and column form, Df[row, Col]
Print (frame-s) # subtracts the corresponding s from each line, essentially subtracting each row at the corresponding index position.
frame = DataFrame (Np.random.randn (4, 3),
Columns=list (' BDE '),
index=[' Utah ', ' Ohio ', ' Texas ', ' Oregon '])
Print (frame)
# Np.abs (frame) # element-level functions
b D eutah 0.062497 0.288348-0.808569ohio 0.349030 0.088106 0.930447Texas - 0.422867-0.349967-1.472045oregon 0.664530-0.415166 0.494318
# sort
frame = DataFrame (Np.arange (8). Reshape ((2, 4)),
index=[' three ', ' one ',
columns=[' d ', ' A ', ' B ', ' C '])
Frame.sort_index () # Sort by row index
Frame.sort_index (Axis=1, Ascending=false) # Sort By column name in descending order
frame = DataFrame ({' B ': [4, 7, -3, 2], ' a ': [0, 1, 0, 1]})
Frame
# Data Merge, First review the SQL about join concept.
# outer: Left-,-Right
# Inner: Around
# Left: Around, Ieft-
# Right: around,-starboard
# Other required points of knowledge
# 1. Pivot and Melt
# 2. Value substitution
# 3. Data cutting
# 4. permutation combinations and random sampling
# Homework: Statistics on the state voting according to the U.S. General election poll data (data: data/2016-us-ge-by-county.csv)
# Asking for data: the number of votes in each State, the respective votes of Hillary and Sichuan
Python (viii, Pandas table processing)