Python編程:pandas入門,pythonpandas入門
總算找到時間學習了下pandas,先學習一部分,後面繼續補充
import pandas as pdimport numpy as npimport matplotlib.pyplot as plt# 建立序列, 讓pandas建立預設整數索引s = pd.Series([1, 3, 5, np.nan, 6, 8])print(s)"""0 1.01 3.02 5.03 NaN4 6.05 8.0dtype: float64"""# 資料讀取# csv檔案讀取# 預設第一行列名,資料從第二行起# (資料從第一行起header=None)data = pd.read_csv("data.csv")print(data)""" id name age score0 1 tom 12 981 2 tom 12 982 3 tom 12 983 4 tom 12 984 5 tom 12 985 6 tom 12 986 7 tom 12 987 8 tom 12 988 9 tom 12 989 10 tom 12 98"""# 顯示前面幾行資料(預設前5行)print(data.head())""" id name age score0 1 tom 12 981 2 tom 12 982 3 tom 12 983 4 tom 12 984 5 tom 12 98"""# 讀取尾部資料print(data.tail())""" id name age score5 6 tom 12 986 7 tom 12 987 8 tom 12 988 9 tom 12 989 10 tom 12 98"""# 顯示列名print(data.columns)# Index(['id', 'name', 'age', 'score'], dtype='object')# 顯示行號print(data.index)# RangeIndex(start=0, stop=10, step=1)# 顯示大小print(data.shape)# (10, 4)# 索引和計算# 列名預設為字串(Object)。行的index預設為整數# 提取行print(data.loc[3:6])""" id name age score3 4 tom 12 984 5 tom 12 985 6 tom 12 986 7 tom 12 98"""# 使用list提取行print(data[3:6])""" id name age score3 4 tom 12 984 5 tom 12 985 6 tom 12 98"""# 提取列columns = ["name", "age"]print(data[columns])""" name age0 tom 121 tom 122 tom 123 tom 124 tom 125 tom 126 tom 127 tom 128 tom 129 tom 12"""# 列名轉為listprint(data.columns.tolist())# ['id', 'name', 'age', 'score']print(data.columns.values)# ['id' 'name' 'age' 'score']print(data.columns.values.dtype)# object# 切片print(data.loc[3:6][["name", "age"]])""" name age3 tom 124 tom 125 tom 126 tom 12"""print(data["name"])"""0 tom1 tom2 tom3 tom4 tom5 tom6 tom7 tom8 tom9 tomName: name, dtype: object"""# 取極值print(data["id"].max())# 10# 運算num = data.shape[0]age_average = data["id"]/numprint(age_average.head())"""0 0.11 0.22 0.33 0.44 0.5Name: id, dtype: float64"""# 排序data.sort_values("id", inplace=False, ascending=False)print(data.head())
查看評論