Python implementation of rollingregression (rolling regression analysis)

Source: Internet
Author: User

#-*-Coding:utf-8-*-
"""
Created on Sat 18 11:08:38 2018

@author: Acadsoc
"""

Import Pandas as PD
Import NumPy as NP
Import Matplotlib
Import Matplotlib.pyplot as Plt
From pyecharts import Bar, line, Page, overlap
Import Statsmodels.api as SM
From sklearn.preprocessing import Standardscaler
# import Pymssql
From Dateutil Import Parser
Import Copy
Import OS
Import Sys
From featureselection import featureselection

Plt.style.use (' Ggplot ') # set GGPLOT2 paint style
# Set the text body path based on different platforms
if sys.platform = = ' Linux ':
Zh_font = Matplotlib.font_manager. Fontproperties (
Fname= ' Path/anaconda3/lib/python3.6/site-packages/matplotlib/mpl-data/fonts/ttf/stzhongs. TTF ')
Else
Zh_font = Matplotlib.font_manager. Fontproperties (fname= ' C:\Windows\Fonts\STZHONGS.ttf ') # set Chinese font

# set working directory based on different platforms
if sys.platform = = ' Linux ':
Os.chdir (path) # Linux Path
Else
Os.chdir (path) # Windows Path

# define rolling multivariate regression analysis classes
Class Rollingregression ():
def __init__ (self, target= ' new singular ', date_begin= ' 2018-01-01 ', date_end= ' 2018-07-31 ', rolling_days=30,
Const=false, P_value_threshold=.1, Normalize=false):
Self.target = target # regression dependent variable
Self.date_begin = date_begin # start date
Self.date_end = date_end # End Date
Self.rolling_days = rolling_days # scrolling Days
self.const = const # regression equation with constant term
Self.p_value_threshold = p_value_threshold # p-Value Display threshold value
Self.normalize = normalize # Whether to standardize the data before you perform regression analysis
If self.normalize: # If the data is normalized, the constant force is set to 0
Self.const = False
# Start date interval must be greater than or equal to scrolling days
if (Parser.parse (self.date_end)-Parser.parse (Self.date_begin)). Days < self.rolling_days:
Raise IOError (' Start date interval must be greater than or equal to scrolling days, please re-select the start date or adjust the scrolling date. ‘)

# Read Data
def getData (self, file= ' performance-related data 2018-8-1.xlsx ', Variabls_in=none, Variables_out=none):
DF = pd.read_excel (file) # Read data
Datetransfer = Np.vectorize (self._datetransfer) # vectorization date conversion function
Df.index = df.iloc[:, 0] # change date to index
DF = df.iloc[:, 1:]
DF = Pd.concat ([Df[self.target], df.iloc[:, 6:]], Axis=1) # Select a useful column
Df[df.isnull ()] = 0 # missing value padding
DF = Df.astype (float) # Converts the data frame object format to float
# Dingdan.index = Datetransfer (dingdan.index) # convert indexed date format
Df.index = PD. Datetimeindex (Df.index) # converting an index to a datetime format

If self.normalize: # Data Normalization
DF_STD = Standardscaler (). Fit_transform (DF)
Self.df_ = PD. DataFrame (DF_STD, Index=df.index, Columns=df.columns)
Else
Self.df_ = DF

# rolling date multivariate linear model
def rollingols (self, DF):
DF = df.loc[(df.index>=self.date_begin) & (Df.index<=self.date_end),: # Select data by parameter given start, cutoff time
DF = Df.sort_index (ascending=true) # Sorted by date Ascending
Coef = {}
Coef_pvalue = {}
r2 = {}

# from the start day to the return
For I in range (Df.shape[0]-self.rolling_days):
Date = Df.index[i+self.rolling_days]
data = Df.iloc[i:i+self.rolling_days,:]
X = data.iloc[:, 1:]
y = data.iloc[:, 0]
# Linear regression Model fitting
Model = SM. OLS (y, X, Hasconst=self.const)
LR = Model.fit ()

# save coefficients, pvalue, R2 by dictionary format
Coef[date] = Lr.params
Coef_pvalue[date] = lr.pvalues
R2[date] = []
R2[date].append (lr.rsquared)
R2[date].append (LR.RSQUARED_ADJ)

# coefficient dictionaries are converted to data frames and sorted by date ascending
Coef = PD. Dataframe.from_dict (Coef, orient= ' index ')
Coef = Coef.sort_index (ascending=true)

# coefficient pvalue converted to data frame and sorted by date ascending
Coef_pvalue = PD. Dataframe.from_dict (Coef_pvalue, orient= ' index ')
Coef_pvalue = Coef_pvalue.sort_index (ascending=true)

# R2 converted to data frame and sorted by date ascending
r2 = PD. Dataframe.from_dict (R2, orient= ' index ')
R2.columns = [' r_squred ', ' R_squred_adj ']
r2 = R2.sort_index (ascending=true)
Return COEF, Coef_pvalue, R2

# define date Conversion functions
def _datetransfer (self, date):
return Parser.parse (date). strftime ('%y-%m-%d ')

# Multivariate regression analysis and saving data
def fit (self, feat_selected=none):
If feat_selected is not None:
DF = Pd.concat ([self.df_.iloc[:, 0], self.df_[feat_selected]], Axis=1)
Else
DF = self.df_
# Rolling regression analysis
Self.coef_, self.coef_pvalue_, self.r2_ = Self.rollingols (DF)
# Storage Analytics Data Sheet
Self.coef_.to_excel (' coef.xlsx ')
Self.coef_pvalue_.to_excel (' coef_pvalue.xlsx ')
Self.r2_.to_excel (' r2.xlsx ')
return self

# drawing
def coefplots (self, width_subplot=12, height_subplot=5, columns_subplots=3):
Num_subplots = self.coef_.shape[1] + 1 # To determine the number of sub-graphs
# Determine the number of sub-graph rows
If num_subplots% Columns_subplots = = 0: # remainder is 0
Rows_subplots = num_subplots//Columns_subplots # Rounding
Else
Rows_subplots = num_subplots//columns_subplots + 1
# Determine canvas width, height
Width_figure = Columns_subplots * Width_subplot
Height_figure = Rows_subplots * Height_subplot

# Draw a rolling regression R2 diagram
Plt.figure (figsize= (Width_figure, height_figure))
Plt.subplot (Rows_subplots, columns_subplots, 1)
Plt.plot (self.r2_[' r_squred '), color= ' R ', lw=3, label= ' r_squred ')
Plt.plot (self.r2_[' R_squred_adj '), color= ' G ', lw=3, label= ' R_squred_adj ')
Plt.title (' R2 ')
Plt.legend ()
# Draw the regression coefficients and P-value graphs in the sub-graph
For I, feature in Enumerate (self.coef_.columns): # coefficient graph
Plt.subplot (Rows_subplots, Columns_subplots, i+2)
Plt.plot (Self.coef_[feature], color= ' Red ', lw=3, label= ' Beta ')

For T, pvalue in Zip (Self.coef_pvalue_.index, self.coef_pvalue_[feature]): # p-Value graph
If Pvalue <= self.p_value_threshold:
Plt.vlines (t, Ymin=np.min (Self.coef_[feature]), Ymax=np.max (Self.coef_[feature]),
Color= ' green ', alpha=.3, lw=5, label= ' P_value ')

#plt. Xlabel (' Date ')
if ((i + columns_subplots + 1)% columns_subplots) & (i > 0) = = 0:
Plt.ylabel (' Coef ')
Plt.title (feature, Fontproperties=zh_font)
# plt.savefig (' Rollingregression.jpeg ') # Save picture
Plt.show ()
return self

# Draw with Echarts. Note: Because there is no VLine method, so the picture file with Echarts is too large, open in the browser is very slow
def coefecharts (self):
Self.page_ = page (self.target + ' regression analysis ')
charts = []
Zeros = Np.zeros (Self.coef_.shape[0])

Line = line (' R2 ') # R2 Chart
Bar = Bar ()
Line.add (' r_squred ', Self.r2_.index, self.r2_[' r_squred '], is_more_utils=true)
Line.add (' R_squred_adj ', Self.r2_.index, self.r2_[' R_squred_adj '], is_more_utils=true)
Charts.append (line)

For I, feature in Enumerate (self.coef_.columns):
Min_num = Np.min (Self.coef_[feature])
Max_num = Np.max (Self.coef_[feature])
Line = line (feature)
Bar = Bar ()
OL = overlap ()
Line.add (' Coef ', Self.coef_.index, Self.coef_[feature], is_more_utils=true) # coefficient graph
#line. On ()
For T, pvalue in Zip (Self.coef_pvalue_.index, self.coef_pvalue_[feature]): # p-Value graph
If Pvalue <= self.p_value_threshold:
Min_array, Max_array = copy.deepcopy (zeros), copy.deepcopy (zeros)
Min_array[self.coef_.index==t] = Min_num
Max_array[self.coef_.index==t] = Max_num
Bar.add (' P-value ', Self.coef_.index, Min_array)
Bar.add (' P-value ', Self.coef_.index, Max_array)

Ol.add (line)
Ol.add (BAR)
Charts.append (OL)

Self.page_.add (Charts)
Self.page_.render () # Save format as HTML, save address as set global path
return self


"' ============================================================================================================ ‘‘‘
# How to use
rr = rollingregression (target= ' continued singular ')
Rr.getdata (file= ' d:/matlab/achivement2018-8-1.xlsx ')

FS = Featureselection ()
Fs.elasticnetfeatureselectplot (Df=rr.df_, l1_ratio=.08,
Plot_width=16, Plot_height=8, Xlim_exp=[-2, 2], ylim=[-.1,.1])
Fs.elasticnetrandomsearch (df=rr.df_)
Fs.elasticnet_rs_best
Fs.elasticnet (Rr.df_, alpha=.7, Normalize=true)
Fs.elasticnet_coef_
Fs.elasticnet_r2_
Fs.eln.coef_
Fs.featurebarhplot (FS.ELASTICNET_COEF_)
Fs.elasticnet_coef_selected_

Fs.randomforestrandomsearch (rr.df_)
Fs.rf_rs_best
Fs.randomforest (Rr.df_, n_estimators=139, max_features=6, impo_cum_threshold=.8)
Fs.featurebarhplot (Fs.rf_feat_impo_)
Fs.rf_feat_selected_
Rr.fit (Fs.rf_feat_selected_)
Rr.coefplots (columns_subplots=2)

Fs.stepwise (rr.df_, response= ' continued singular ', criterion= ' AIC ', Intercept=true, val_enter=0.0,
p_value_enter=.05, direction= ' both ', show_step=true)

Rr.fit (fs.stepwise_feat_selected)
Rr.coefplots (columns_subplots=2)

Python implementation of rollingregression (rolling regression analysis)

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.