The main data were three kinds of preprocessing:
1. Interval Scaling
reading data, data processing, storing data
Import pandas as PD
import NumPy as NP from
Sklearn import preprocessing
import matplotlib.pyplot as PLT
p lt.rcparams[' Font.sans-serif '] =[' Simhei '] #用来正常显示中文标签
plt.rcparams[' Axes.unicode_minus '] =false #用来正常显示负号
filename = ' Hits persecond_t20m_130.csv '
data_f = pd.read_csv (filename) #二维dataframe格式
#print (data_f)
Plt.plot (data_f[200:600])
plt.title (' Pre data preprocessing ')
plt.show ()
print (' ***2. Data normalization, mapping to interval [Min,max]: ' )
Min_max_scaler =preprocessing. Minmaxscaler (feature_range= (0,10))
data_mi_ma =min_max_scaler.fit_transform (data_f)
Plt.plot (data_mi_ MA[200:600])
plt.title (' Data preprocessing ')
plt.show ()
print (Type (DATA_MI_MA))
Data_ = PD. Dataframe (DATA_MI_MA)
print (Type (data_))
data_.to_csv ("Afterpre.csv", index=0,header=0) #index = 0, Indexed columns are not preserved, header=0 do not preserve column names
Note: When you save the processed data as a CSV file, you need to first convert the data to dataframe format
2. Standardization
Importpandas as PD
Importnumpy as NP
fromsklearn import preprocessing
Importmatplotlib.pyplot as Plt
plt.rcparams[' Font.sans-serif ']= [' Simhei '] #用来正常显示中文标签
plt.rcparams[' axes.unicode_minus ']= False #用来正常显示负号
filename= ' Hits per second_t20m_130.csv '
data_f =pd.read_csv (Filename) #二维dataframe格式
#print (data_f)
Plt.plot (data_f[200:600])
plt.title (' before data processing ')
plt.show ()
data_sta= Preprocessing.scale (data_f )
#print (Data_nor)
Plt.plot (data_sta[200:600])
plt.title (' Data processing ')
plt.show ()
print ( Type (Data_sta))
Data_ =pd. Dataframe (DATA_MI_MA)
print (Type (data_))
data_.to_csv ("Afterpre.csv", index=0,header=0) #index = 0, Indexed columns are not preserved, header=0 do not preserve column names
3. Logarithmic
Importpandas as PD
Importnumpy as NP
Importmatplotlib.pyplot as Plt
plt.rcparams[' font.sans-serif ']= [' Simhei '] #用来正常显示中文标签
plt.rcparams[' axes.unicode_minus ']= False #用来正常显示负号
filename= ' Hits per second_t20m_ 130.csv '
data_f =pd.read_csv (Filename) #二维dataframe格式
#print (data_f)
Plt.plot (data_f[200:600))
Plt.title (' Pre-data preprocessing ')
plt.show ()
data_log= np.log (data_f)
Plt.plot (data_log[200:600))
Plt.title (' Pre-data preprocessing ')
plt.show ()
data_log.to_csv ("Afterpre.csv", index=0,header=0) #index = 0, no indexed columns are preserved, Header=0 does not preserve column names