more on wind control modeling, big data analysis and other content please pay attention to the public number "big data wind control bit by bit"
Python implementation of the KS curve, the relevant use of the method please refer to the previous blog-R language Implementation KS curve
The code is as follows:
####################### plotks ######################### #def plotks (preds, labels, n, ASC): # Preds is score:asc=1 # preds is prob:asc=0 pred = preds # predictive Value bad = labels # take 1 for bad, 0 for good Ksds = DataFrame ({' Bad ': Bad, ' pred ': P Red}) ksds[' good '] = 1-ksds.bad if ASC = = 1:KSDS1 = Ksds.sort_values (by=[' pred ', ' bad '], ascending=[true, True]) elif ASC = = 0:KSDS1 = Ksds.sort_values (by=[' pred ', ' bad '], Ascending=[false, True]) Ksds1.index = Ran GE (len (ksds1.pred)) ksds1[' cumsum_good1 '] = 1.0*ksds1.good.cumsum ()/sum (ksds1.good) ksds1[' cumsum_bad1 '] = 1.0*ksds1 . Bad.cumsum ()/sum (Ksds1.bad) if ASC = = 1:KSDS2 = Ksds.sort_values (by=[' pred ', ' bad '], Ascending=[true, False]) elif ASC = = 0:KSDS2 = Ksds.sort_values (by=[' pred ', ' bad '], Ascending=[false, False]) Ksds2.index = range (len (ksds2.pred)) ksds2[' cumsum_good2 ') = 1.0*ksds2.good.cumsum ()/sum (ksds2.good) ksds2[' cumsum_bad2 '] = 1.0*KSDS2.BAD.C Umsum ()/sum (Ksds2.bad) # ksds1 ksds2, average Ksds = ksds1[[' cumsum_good1 ', ' cumsum_bad1 '] [ksds[' cumsum_good2 '] = ksds2[' Cumsum_goo D2 '] ksds[' cumsum_bad2 '] = ksds2[' cumsum_bad2 '] ksds[' cumsum_good '] = (ksds[' cumsum_good1 ') + ksds[' cumsum_good2 '])/ 2 ksds[' Cumsum_bad ' = (ksds[' cumsum_bad1 ') + ksds[' cumsum_bad2 '])/2 # ks ksds[' ks '] = ksds[' Cumsum_bad ']-ksds[ ' Cumsum_good ' ksds[' tile0 '] = range (1, len (KSDS.KS) + 1) ksds[' tile ' = 1.0*ksds[' Tile0 ']/len (ksds[' TILE0 ']) QE = List (Np.arange (0, 1, 1.0/n)) Qe.append (1) QE = qe[1:] Ks_index = Series (ksds.index) Ks_index = Ks_index.quan Tile (q = QE) Ks_index = Np.ceil (Ks_index). Astype (int) Ks_index = list (ks_index) Ksds = Ksds.loc[ks_index] Ksds = ksds[[' tile ', ' cumsum_good ', ' Cumsum_bad ', ' ks ']] ksds0 = Np.array ([[[0, 0, 0, 0]]) Ksds = Np.concatenate ([Ksds0, KSDS], axis=0) Ksds = DataFrame (Ksds, columns=[' tile ', ' cumsum_good ', ' Cumsum_bad ', ' ks ')] Ks_value = Ksds.ks.max () Ks_pop = Ksds.tile[ksDs.ks.idxmax ()] Print (' ks_value is ' + str (np.round (Ks_value, 4)) + ' at pop = ' + str (np.round (Ks_pop, 4)) # Char T Plt.plot (Ksds.tile, Ksds.cumsum_good, label= ' Cum_good ', color= ' Blue ', linestyle= '-', Linewidt h=2) Plt.plot (Ksds.tile, Ksds.cumsum_bad, label= ' Cum_bad ', color= ' red ', linestyle= '-', linewidth =2) Plt.plot (Ksds.tile, KSDS.KS, label= ' ks ', color= ' green ', linestyle= '-', linewidth=2) Plt.axvlin E (Ks_pop, color= ' Gray ', linestyle= '--') Plt.axhline (ks_value, color= ' green ', linestyle= '--') Plt.axhline (ksds.loc[ks Ds.ks.idxmax (), ' Cumsum_good '], color= ' Blue ', linestyle= '--') Plt.axhline (Ksds.loc[ksds.ks.idxmax (), ' Cumsum_bad '), Color= ' Red ', linestyle= '--') plt.title (' ks=%s '%np.round (Ks_value, 4) + ' at pop=%s '%np.round (Ks_pop, 4), fontsize=15) return ksds####################### over ##########################
The drawing effect is as follows:
Python calculates KS values and plots KS curves