Import pandas as Pdimport NumPy as Npfrom sklearn import preprocessingfrom keras.models import Sequentialfrom keras.layers . Core import dense, Activation, dropout# load training and test Datasetstrain = Pd.read_csv (' ... /input/train_set.csv ', parse_dates=[2,]) test = Pd.read_csv ('.. /input/test_set.csv ', parse_dates=[3,]) tubes = pd.read_csv ('. /input/tube.csv ') # Create some new featurestrain[' year '] = train.quote_date.dt.yeartrain[' month ' = Train.quote_ date.dt.monthtrain[' dayofyear ' = train.quote_date.dt.dayofyeartrain[' dayofweek '] = Train.quote_ date.dt.dayofweektrain[' Day ') = train.quote_date.dt.daytest[' year '] = test.quote_date.dt.yeartest[' month '] = test.quote_date.dt.monthtest[' dayofyear ' = test.quote_date.dt.dayofyeartest[' dayofweek '] = Test.quote_ date.dt.dayofweektest[' Day ' = Test.quote_date.dt.daytrain = Pd.merge (train,tubes,on= ' tube_assembly_id ', how= ' Inner ') test = Pd.merge (test,tubes,on= ' tube_assembly_id ', how= ' inner ') train[' material_id '].fillna (' SP-9999 ', inplace=true) test[' Material_id '].fillna (' SP-9999 ', inplace=true) # drop useless columns and create LABELSIDX = test.id.values.astype (int) test = Test.drop ([' id ', ' tube_assembly_id ', ' quote_date '), Axis = 1) labels = Train.cost.valuestrain = Train.drop ([' Quote_date ' , ' cost ', ' tube_assembly_id '], Axis = 1) # Convert data to NumPy Arraytrain = Np.array (train) test = Np.array (test)
From:kaggle
Copyright NOTICE: This article for Bo Master original article, without Bo Master permission not reproduced.
Machine learning in coding (Python): Merge feature by keyword, delete useless feature, convert to NumPy array