Python implementation of rollingregression (rolling regression analysis)

Last Update:2018-08-25 Source: Internet

Author: User

Developer on Alibaba Coud: Build your first app with APIs, SDKs, and tutorials on the Alibaba Cloud. Read more ＞

#-*-Coding:utf-8-*-
"""
Created on Sat 18 11:08:38 2018

@author: Acadsoc
"""

Import Pandas as PD
Import NumPy as NP
Import Matplotlib
Import Matplotlib.pyplot as Plt
From pyecharts import Bar, line, Page, overlap
Import Statsmodels.api as SM
From sklearn.preprocessing import Standardscaler
# import Pymssql
From Dateutil Import Parser
Import Copy
Import OS
Import Sys
From featureselection import featureselection

Plt.style.use (' Ggplot ') # set GGPLOT2 paint style
# Set the text body path based on different platforms
if sys.platform = = ' Linux ':
Zh_font = Matplotlib.font_manager. Fontproperties (
Fname= ' Path/anaconda3/lib/python3.6/site-packages/matplotlib/mpl-data/fonts/ttf/stzhongs. TTF ')
Else
Zh_font = Matplotlib.font_manager. Fontproperties (fname= ' C:\Windows\Fonts\STZHONGS.ttf ') # set Chinese font

# set working directory based on different platforms
if sys.platform = = ' Linux ':
Os.chdir (path) # Linux Path
Else
Os.chdir (path) # Windows Path

# define rolling multivariate regression analysis classes
Class Rollingregression ():
def __init__ (self, target= ' new singular ', date_begin= ' 2018-01-01 ', date_end= ' 2018-07-31 ', rolling_days=30,
Const=false, P_value_threshold=.1, Normalize=false):
Self.target = target # regression dependent variable
Self.date_begin = date_begin # start date
Self.date_end = date_end # End Date
Self.rolling_days = rolling_days # scrolling Days
self.const = const # regression equation with constant term
Self.p_value_threshold = p_value_threshold # p-Value Display threshold value
Self.normalize = normalize # Whether to standardize the data before you perform regression analysis
If self.normalize: # If the data is normalized, the constant force is set to 0
Self.const = False
# Start date interval must be greater than or equal to scrolling days
if (Parser.parse (self.date_end)-Parser.parse (Self.date_begin)). Days < self.rolling_days:
Raise IOError (' Start date interval must be greater than or equal to scrolling days, please re-select the start date or adjust the scrolling date. ‘)

# Read Data
def getData (self, file= ' performance-related data 2018-8-1.xlsx ', Variabls_in=none, Variables_out=none):
DF = pd.read_excel (file) # Read data
Datetransfer = Np.vectorize (self._datetransfer) # vectorization date conversion function
Df.index = df.iloc[:, 0] # change date to index
DF = df.iloc[:, 1:]
DF = Pd.concat ([Df[self.target], df.iloc[:, 6:]], Axis=1) # Select a useful column
Df[df.isnull ()] = 0 # missing value padding
DF = Df.astype (float) # Converts the data frame object format to float
# Dingdan.index = Datetransfer (dingdan.index) # convert indexed date format
Df.index = PD. Datetimeindex (Df.index) # converting an index to a datetime format

If self.normalize: # Data Normalization
DF_STD = Standardscaler (). Fit_transform (DF)
Self.df_ = PD. DataFrame (DF_STD, Index=df.index, Columns=df.columns)
Else
Self.df_ = DF

# rolling date multivariate linear model
def rollingols (self, DF):
DF = df.loc[(df.index>=self.date_begin) & (Df.index<=self.date_end),: # Select data by parameter given start, cutoff time
DF = Df.sort_index (ascending=true) # Sorted by date Ascending
Coef = {}
Coef_pvalue = {}
r2 = {}

# from the start day to the return
For I in range (Df.shape[0]-self.rolling_days):
Date = Df.index[i+self.rolling_days]
data = Df.iloc[i:i+self.rolling_days,:]
X = data.iloc[:, 1:]
y = data.iloc[:, 0]
# Linear regression Model fitting
Model = SM. OLS (y, X, Hasconst=self.const)
LR = Model.fit ()

# save coefficients, pvalue, R2 by dictionary format
Coef[date] = Lr.params
Coef_pvalue[date] = lr.pvalues
R2[date] = []
R2[date].append (lr.rsquared)
R2[date].append (LR.RSQUARED_ADJ)

# coefficient dictionaries are converted to data frames and sorted by date ascending
Coef = PD. Dataframe.from_dict (Coef, orient= ' index ')
Coef = Coef.sort_index (ascending=true)

# coefficient pvalue converted to data frame and sorted by date ascending
Coef_pvalue = PD. Dataframe.from_dict (Coef_pvalue, orient= ' index ')
Coef_pvalue = Coef_pvalue.sort_index (ascending=true)

# R2 converted to data frame and sorted by date ascending
r2 = PD. Dataframe.from_dict (R2, orient= ' index ')
R2.columns = [' r_squred ', ' R_squred_adj ']
r2 = R2.sort_index (ascending=true)
Return COEF, Coef_pvalue, R2

# define date Conversion functions
def _datetransfer (self, date):
return Parser.parse (date). strftime ('%y-%m-%d ')

# Multivariate regression analysis and saving data
def fit (self, feat_selected=none):
If feat_selected is not None:
DF = Pd.concat ([self.df_.iloc[:, 0], self.df_[feat_selected]], Axis=1)
Else
DF = self.df_
# Rolling regression analysis
Self.coef_, self.coef_pvalue_, self.r2_ = Self.rollingols (DF)
# Storage Analytics Data Sheet
Self.coef_.to_excel (' coef.xlsx ')
Self.coef_pvalue_.to_excel (' coef_pvalue.xlsx ')
Self.r2_.to_excel (' r2.xlsx ')
return self

# drawing
def coefplots (self, width_subplot=12, height_subplot=5, columns_subplots=3):
Num_subplots = self.coef_.shape[1] + 1 # To determine the number of sub-graphs
# Determine the number of sub-graph rows
If num_subplots% Columns_subplots = = 0: # remainder is 0
Rows_subplots = num_subplots//Columns_subplots # Rounding
Else
Rows_subplots = num_subplots//columns_subplots + 1
# Determine canvas width, height
Width_figure = Columns_subplots * Width_subplot
Height_figure = Rows_subplots * Height_subplot

# Draw a rolling regression R2 diagram
Plt.figure (figsize= (Width_figure, height_figure))
Plt.subplot (Rows_subplots, columns_subplots, 1)
Plt.plot (self.r2_[' r_squred '), color= ' R ', lw=3, label= ' r_squred ')
Plt.plot (self.r2_[' R_squred_adj '), color= ' G ', lw=3, label= ' R_squred_adj ')
Plt.title (' R2 ')
Plt.legend ()
# Draw the regression coefficients and P-value graphs in the sub-graph
For I, feature in Enumerate (self.coef_.columns): # coefficient graph
Plt.subplot (Rows_subplots, Columns_subplots, i+2)
Plt.plot (Self.coef_[feature], color= ' Red ', lw=3, label= ' Beta ')

For T, pvalue in Zip (Self.coef_pvalue_.index, self.coef_pvalue_[feature]): # p-Value graph
If Pvalue <= self.p_value_threshold:
Plt.vlines (t, Ymin=np.min (Self.coef_[feature]), Ymax=np.max (Self.coef_[feature]),
Color= ' green ', alpha=.3, lw=5, label= ' P_value ')

#plt. Xlabel (' Date ')
if ((i + columns_subplots + 1)% columns_subplots) & (i > 0) = = 0:
Plt.ylabel (' Coef ')
Plt.title (feature, Fontproperties=zh_font)
# plt.savefig (' Rollingregression.jpeg ') # Save picture
Plt.show ()
return self

# Draw with Echarts. Note: Because there is no VLine method, so the picture file with Echarts is too large, open in the browser is very slow
def coefecharts (self):
Self.page_ = page (self.target + ' regression analysis ')
charts = []
Zeros = Np.zeros (Self.coef_.shape[0])

Line = line (' R2 ') # R2 Chart
Bar = Bar ()
Line.add (' r_squred ', Self.r2_.index, self.r2_[' r_squred '], is_more_utils=true)
Line.add (' R_squred_adj ', Self.r2_.index, self.r2_[' R_squred_adj '], is_more_utils=true)
Charts.append (line)

For I, feature in Enumerate (self.coef_.columns):
Min_num = Np.min (Self.coef_[feature])
Max_num = Np.max (Self.coef_[feature])
Line = line (feature)
Bar = Bar ()
OL = overlap ()
Line.add (' Coef ', Self.coef_.index, Self.coef_[feature], is_more_utils=true) # coefficient graph
#line. On ()
For T, pvalue in Zip (Self.coef_pvalue_.index, self.coef_pvalue_[feature]): # p-Value graph
If Pvalue <= self.p_value_threshold:
Min_array, Max_array = copy.deepcopy (zeros), copy.deepcopy (zeros)
Min_array[self.coef_.index==t] = Min_num
Max_array[self.coef_.index==t] = Max_num
Bar.add (' P-value ', Self.coef_.index, Min_array)
Bar.add (' P-value ', Self.coef_.index, Max_array)

Ol.add (line)
Ol.add (BAR)
Charts.append (OL)

Self.page_.add (Charts)
Self.page_.render () # Save format as HTML, save address as set global path
return self

"' ============================================================================================================ ‘‘‘
# How to use
rr = rollingregression (target= ' continued singular ')
Rr.getdata (file= ' d:/matlab/achivement2018-8-1.xlsx ')

FS = Featureselection ()
Fs.elasticnetfeatureselectplot (Df=rr.df_, l1_ratio=.08,
Plot_width=16, Plot_height=8, Xlim_exp=[-2, 2], ylim=[-.1,.1])
Fs.elasticnetrandomsearch (df=rr.df_)
Fs.elasticnet_rs_best
Fs.elasticnet (Rr.df_, alpha=.7, Normalize=true)
Fs.elasticnet_coef_
Fs.elasticnet_r2_
Fs.eln.coef_
Fs.featurebarhplot (FS.ELASTICNET_COEF_)
Fs.elasticnet_coef_selected_

Fs.randomforestrandomsearch (rr.df_)
Fs.rf_rs_best
Fs.randomforest (Rr.df_, n_estimators=139, max_features=6, impo_cum_threshold=.8)
Fs.featurebarhplot (Fs.rf_feat_impo_)
Fs.rf_feat_selected_
Rr.fit (Fs.rf_feat_selected_)
Rr.coefplots (columns_subplots=2)

Fs.stepwise (rr.df_, response= ' continued singular ', criterion= ' AIC ', Intercept=true, val_enter=0.0,
p_value_enter=.05, direction= ' both ', show_step=true)

Rr.fit (fs.stepwise_feat_selected)
Rr.coefplots (columns_subplots=2)

This article is an English version of an article which is originally in the Chinese language on aliyun.com and is provided for information purposes only. This website makes no representation or warranty of any kind, either expressed or implied, as to the accuracy, completeness ownership or reliability of the article or any translations thereof. If you have any concerns or complaints relating to the article, please send an email, providing a detailed description of the concern or complaint, to info-contact@alibabacloud.com. A staff member will contact you within 5 working days. Once verified, infringing content will be removed immediately.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

Get Started for Free

Sales Support

1 on 1 presale consultation

Chat Contact Sales
After-Sales Support

24/7 Technical Support 6 Free Tickets per Quarter Faster Response

Open a Ticket
Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.

Learn More

Python implementation of rollingregression (rolling regression analysis)

Contact Us

What's Trending

Top 10 Tags

Top 10 Keywords

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support

Python implementation of rollingregression (rolling regression analysis)

Contact Us

What's Trending

Top 10 Tags

Top 10 Keywords

Trending Topic

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support