#数据预处理方法, mainly dealing with the dimension of data and the problem of the same trend.
Import NumPy as NP
From Sklearn Import preprocessing
#零均值规范
Data=np.random.rand (3,4) #随机生成3行4列的数据
Data_standardized=preprocessing.scale (data) #对数据进行归一化处理, that is, each value minus the mean divided by the variance is primarily used for SVM
#线性数据变换最大最小化处理
Data_scaler=preprocessing. Minmaxscaler (feature_range= (0,1)) #选定区间 (0,1), raw Data-min/(max-min)
Data_scaled=data_scaler.fit (data)
#数据标准化处理normalized
data_normalized=preprocessing.normalize (data,norm= ' L1 ') #减少人为增加特征, processed data Jia equals 1
#特征二值化,
Data_binarized=prepressing. Binarizer (threshold=0.5). Transform (data) #以0.5 is a threshold value greater than 0.5 is 1 and less than 0.5 is 0
#label_encode对标签进行数值化
Label_encode=preprocessing. Labelencoder ()
input_class=[' Audi ', ' Ford ', ' Audi ', ' BMW ', ' Toyota ', ' Benz '
Label_encode.fit (Input_class)
For I, item in Enmerate (LABEL_ENCODE.CLASS_):
Print (item, '--', i)
#onehotencode
Data preprocessing of Python machine learning