# Coding:utf-8
Import Pandas as PD
From datetime import datetime
From datetime import Timedelta
From Dateutil.parser Import Parse
From pandas import Series,dataframe
Rowdata=pd.read_table (' C:/users/xiaoyniu/ag_forcast/traindata/trainingdata_0811.txt ', keep_date_col=True)
Type (RowData)
trainingdata=rowdata[[' pkg_tss_ship_date ', ' pkg_tss_ship_date.1 ']
Type (trainingdata)
TRAININGDATA[:10]
trainingdata.colnames=[' na ', ' cn ']
Trainingdata.colnames
trainingdata[' Pkg_tss_ship_date ']=pd.to_datetime (trainingdata[' pkg_tss_ship_date '])
trainingdata[' pkg_tss_ship_date.1 ']=pd.to_datetime (trainingdata[' pkg_tss_ship_date.1 '])
TRAININGDATA[:10]
timedelta=trainingdata[' pkg_tss_ship_date.1 ']-trainingdata[' pkg_tss_ship_date ']
TIMEDELTA[:10]
Train1=trainingdata
Del train1[' Pkg_tss_ship_date ']
train1[' Timedelta ']=timedelta
TRAIN1[:10]
Type (train1[' Timedelta ')
Ts=series (timedelta.values,index=trainingdata[' pkg_tss_ship_date.1 ')
Type (TS) #series
TS[:10]
TS.INDEX[:10]
TS.VALUES[:10]
STAMP=TS.INDEX[0]
Stamp
ts[' 2015-08-18 00:00:00 '] #大部分是10天前的货
ts[' 2015-08 '] #无需传入完整日期也能对数据进行切片
#ts. Truncate (after= ' 2015-09-01 ') #另一种切片方法, remove data from after September 1
Ts.index.is_unique #检查是否唯一
Grouped=ts.groupby (level=0)
Grouped.count () #不同日期的发货量
# Grouped.median () #????????? No way to find the median or average, how to solve
Index=pd.date_range (' 20160901 ', ' 20160930 ')
Index #默认产生的是按天计算的时间点
Pd.date_range (start= ' 20160901 ', periods=10) #往后数10天, including 20160901
Pd.date_range (end= ' 20160901 ', periods=10) #往前数10天, including 20160901
Pd.date_range (start= ' 20160401 ', end= ' 20160901 ', freq= ' BM ') #每月的最后一个工作日
Pd.date_range (start= ' 20160901 10:20:30 ', periods=10) #当起始 (terminating) date stamp with time, the resulting date stamp also has time
Pd.date_range (start= ' 20160901 10:20:30 ', periods=10,normalize=true) #normalize选项可以避免这一问题, normalize the date to midnight Timestamp
From pandas.tseries.offsets import Hour,minute
Hour=hour ()
Hour
Four_hour=hour (4) #一般不用这样显式的创建这样的对象
Four_hour #4个小时
Pd.date_range (' 20160901 ', ' 20160902 ', freq= ' 8h ') #一般这样由一个基础频率和一个乘数组成的字符串参数来结合使用
Pd.date_range (' 20160901 ', ' 20160902 ', freq= ' 8h30min ')
Hour (4) +minute (#大部分偏移量对象都可以直接用加法连接), case-sensitive
Rng=pd.date_range (' 20160901 ', ' 20161201 ', freq= ' Wom-2fri ') #week of month: Second Friday of each month
Rng
Ts2=ts.shift (2) #时间戳不变, move data back two days
TS2[:10]
Tsf2=ts.shift ( -2) #时间戳不变, move data forward two days
TSF2[:10]
From pandas.tseries.offsets import Day,monthend
DateTime.Now () +3*day ()
DateTime.Now () +day (3)
DateTime.Now () +monthend () #9月底
DateTime.Now () +monthend (3) #11月底
Offset=monthend ()
Offset
Offset.rollforward (DateTime.Now ()) #向后推一个月, by the end of the month
Offset.rollback (DateTime.Now ()) #向前推一个月, by the end of the month
Offset2=monthend (3)
Offset2
Offset2.rollforward (DateTime.Now ()) #向后推一个月 to the end of the month. Why not push backwards for 3 months???
Offset2.rollback (DateTime.Now ()) #向前推一个月 to the end of the month.
Import Pytz #处理时区
Pytz.common_timezones[-5:] #通常的几个时区
Pytz.timezone (' Us/eastern ') #利用时区名, gets the time zone object
Print (Ts.index.tz) #ts的索引中没有时区信息
#可以在创建数据集的时候通过 tz= ' UTC ' to create
Ts_utc=ts.tz_localize (' UTC ') #本地化时区
Ts_utc.tz_convert (' Us/eastern ') #本地化后的时间序列才能被转换成别的时区
STAMP=PD. Timestamp (' 2011-03-12 04:00 ') #timestamp对象也能通过本地化, convert to other time zones
Stamp_utc=stamp.tz_localize (' Us/eastern ') #但是timestamp对象是什么东西???
Stamp_utc
Stamp_utc.tz_convert (' UTC ')
STAMP_MOSCOW=PD. Timestamp (' 2011-03-12 04:00 ', tz= ' UTC ') #创建timestamp对象时可以传入一个时区信息
Stamp_moscow
Stamp_moscow.value #UTC时间戳???
Stamp_moscow.tz_convert (' Us/eastern '). Value #时区变换, and this timestamp does not change. But why???
From pandas.tseries.offsets import Hour
Import NumPy as NP
Rng=pd.date_range (' 2016-09-01 ', periods=10,freq= ' B ')
Ts=series (Np.random.randn (RNG)), index=rng)
Ts1=ts[:7].tz_localize (' Europe/london ')
Ts2=ts1[2:].tz_convert (' Europe/moscow ')
Result=ts1+ts2 #当两个不同的时区相加时,
Result.index #最后得到的结果是UTC
P=PD. Period (2007,freq= ' A-dec ') #period类是一段时间的意思, which means from January 1, 2007 to December 31, 2007
P
P+5
P-2 #根据其频率对时间进行位移
Pd. Period (2009,freq= ' A-dec ')-P #频率相同时, can be directly subtracted from
Rng=pd.period_range (' 2009-09-09 ', ' 2010-01-02 ', freq= ' M ')
RNG #period_range can also create a time range for a rule
Value=[' 2003 ', ' 2004 ', ' 2005 ']
INDEX=PD. Periodindex (value,freq= ' Q-dec ') #periodindex是个什么类???
Index
P.asfreq (' M ', ' Start ') #将年度数据转换为月度的形式, converted to the month of the year
P.asfreq (' M ', ' End ') #将年度数据转换为月度的形式, converted to December of the year
P1=PD. Period (' freq= ', ' A-jun ')
P1.asfreq (' m ', ' Start ') #Period (' 2015-07 ', ' m ')
P1.asfreq (' m ', ' End ') #Period (' 2016-06 ', ' m ')
P2=PD. Period (' 2016-09 ', ' M ')
P2.asfreq (' A-jun ') #2016年9月进行频率转换, equivalent to 2017 years in the time frequency ending in June
Rng=pd.period_range (' 2006 ', ' freq= ', ' A-dec ')
Ts=series (Np.random.randn (RNG)), index=rng)
Ts.asfreq (' M ', how= ' start ')
Ts.asfreq (' M ', how= ' end ')
P5=PD. Period (' 2016q3 ', freq= ' A-mar ') #Q n What's the use here???
P5.asfreq (' d ', ' Start ') #Period (' 2015-04-01 ', ' d ')
P5.asfreq (' d ', ' End ') #Period (' 2016-03-31 ', ' d ')
Rng=pd.period_range (' 2011q3 ', ' 2012Q4 ', freq= ' Q-jan ') #使用period_range生成季度数据
Ts=series (Np.arange (RNG)), index=rng)
Rng=pd.date_range (' 2016-01 ', ' 2016-09 ', freq= ' m ')
Rng.to_period () #data_range对象才有to_period方法, the To_period method can convert a timestamp index to a time index
P6=PD. Period (' freq= ', ' A-dec ')
P6.to_timestamp (how= ' end ') #to_timestamp方法可以将时期索引转换为时间戳
Rng=pd.date_range (' 2016-01-29 ', periods=6,freq= ' d ')
Rng.to_period (freq= ' m ') #产生重叠的时间区间
Data analysis using Python-the Tenth Time series (1)