標籤:plt col python plot 相關性 信用卡 enc res infer
# -*- coding:utf-8 -*-# Data Integrationimport csvimport numpy as npimport pandas as pdimport matplotlib.pyplot as plt #客戶資訊basicInfo = pd.DataFrame.from_csv(‘datas/basicInfo_train.csv‘, header=0, sep=‘,‘, index_col=0, parse_dates=True, encoding=None, tupleize_cols=False, infer_datetime_format=False)#曆史還款記錄historyInfo = pd.DataFrame.from_csv(‘datas/history_train.csv‘, header=0, sep=‘,‘, index_col=0, parse_dates=True, encoding=None, tupleize_cols=False, infer_datetime_format=False)#曆史逾期情況defaultInfo = pd.DataFrame.from_csv(‘datas/default_train.csv‘, header=0, sep=‘,‘, index_col=0, parse_dates=True, encoding=None, tupleize_cols=False, infer_datetime_format=False)combineInfo = pd.concat([basicInfo,historyInfo,defaultInfo],axis=1)
#查看前10條資料combineInfo[:10]
#性別分析gender = combineInfo.groupby(‘SEX‘)[‘Default‘].mean().reset_index()plt.xticks((0,1),(u"Male",u"Female"))plt.xlabel(u"Gender")plt.ylabel(u"Counts")plt.bar(gender.SEX,gender.Default,0.1,color=‘green‘)plt.show()
#教育程度與default值的相關性分析edu = combineInfo.groupby(‘EDUCATION‘)[‘Default‘].mean()plt.plot(edu)plt.show()
#婚姻狀況分析marriage = combineInfo.groupby(‘MARRIAGE‘)[‘Default‘].mean().reset_index()plt.bar(marriage.MARRIAGE,marriage.Default,0.5,color=‘green‘)plt.show()
Python大資料:信用卡逾期分析