Data analysis using Python-the Tenth Time series (1)

Source: Internet
Author: User
Tags time zones timedelta


# Coding:utf-8

Import Pandas as PD
From datetime import datetime
From datetime import Timedelta
From Dateutil.parser Import Parse
From pandas import Series,dataframe

Rowdata=pd.read_table (' C:/users/xiaoyniu/ag_forcast/traindata/trainingdata_0811.txt ', keep_date_col=True)
Type (RowData)
trainingdata=rowdata[[' pkg_tss_ship_date ', ' pkg_tss_ship_date.1 ']
Type (trainingdata)
TRAININGDATA[:10]

trainingdata.colnames=[' na ', ' cn ']
Trainingdata.colnames

trainingdata[' Pkg_tss_ship_date ']=pd.to_datetime (trainingdata[' pkg_tss_ship_date '])
trainingdata[' pkg_tss_ship_date.1 ']=pd.to_datetime (trainingdata[' pkg_tss_ship_date.1 '])
TRAININGDATA[:10]

timedelta=trainingdata[' pkg_tss_ship_date.1 ']-trainingdata[' pkg_tss_ship_date ']
TIMEDELTA[:10]

Train1=trainingdata
Del train1[' Pkg_tss_ship_date ']
train1[' Timedelta ']=timedelta
TRAIN1[:10]
Type (train1[' Timedelta ')

Ts=series (timedelta.values,index=trainingdata[' pkg_tss_ship_date.1 ')
Type (TS) #series
TS[:10]
TS.INDEX[:10]
TS.VALUES[:10]

STAMP=TS.INDEX[0]
Stamp

ts[' 2015-08-18 00:00:00 '] #大部分是10天前的货
ts[' 2015-08 '] #无需传入完整日期也能对数据进行切片
#ts. Truncate (after= ' 2015-09-01 ') #另一种切片方法, remove data from after September 1

Ts.index.is_unique #检查是否唯一

Grouped=ts.groupby (level=0)

Grouped.count () #不同日期的发货量
# Grouped.median () #????????? No way to find the median or average, how to solve

Index=pd.date_range (' 20160901 ', ' 20160930 ')
Index #默认产生的是按天计算的时间点

Pd.date_range (start= ' 20160901 ', periods=10) #往后数10天, including 20160901
Pd.date_range (end= ' 20160901 ', periods=10) #往前数10天, including 20160901
Pd.date_range (start= ' 20160401 ', end= ' 20160901 ', freq= ' BM ') #每月的最后一个工作日
Pd.date_range (start= ' 20160901 10:20:30 ', periods=10) #当起始 (terminating) date stamp with time, the resulting date stamp also has time
Pd.date_range (start= ' 20160901 10:20:30 ', periods=10,normalize=true) #normalize选项可以避免这一问题, normalize the date to midnight Timestamp

From pandas.tseries.offsets import Hour,minute

Hour=hour ()
Hour
Four_hour=hour (4) #一般不用这样显式的创建这样的对象
Four_hour #4个小时
Pd.date_range (' 20160901 ', ' 20160902 ', freq= ' 8h ') #一般这样由一个基础频率和一个乘数组成的字符串参数来结合使用
Pd.date_range (' 20160901 ', ' 20160902 ', freq= ' 8h30min ')
Hour (4) +minute (#大部分偏移量对象都可以直接用加法连接), case-sensitive

Rng=pd.date_range (' 20160901 ', ' 20161201 ', freq= ' Wom-2fri ') #week of month: Second Friday of each month
Rng

Ts2=ts.shift (2) #时间戳不变, move data back two days
TS2[:10]
Tsf2=ts.shift ( -2) #时间戳不变, move data forward two days
TSF2[:10]

From pandas.tseries.offsets import Day,monthend

DateTime.Now () +3*day ()
DateTime.Now () +day (3)

DateTime.Now () +monthend () #9月底
DateTime.Now () +monthend (3) #11月底

Offset=monthend ()
Offset
Offset.rollforward (DateTime.Now ()) #向后推一个月, by the end of the month
Offset.rollback (DateTime.Now ()) #向前推一个月, by the end of the month
Offset2=monthend (3)
Offset2
Offset2.rollforward (DateTime.Now ()) #向后推一个月 to the end of the month. Why not push backwards for 3 months???
Offset2.rollback (DateTime.Now ()) #向前推一个月 to the end of the month.

Import Pytz #处理时区

Pytz.common_timezones[-5:] #通常的几个时区
Pytz.timezone (' Us/eastern ') #利用时区名, gets the time zone object

Print (Ts.index.tz) #ts的索引中没有时区信息
#可以在创建数据集的时候通过 tz= ' UTC ' to create

Ts_utc=ts.tz_localize (' UTC ') #本地化时区
Ts_utc.tz_convert (' Us/eastern ') #本地化后的时间序列才能被转换成别的时区

STAMP=PD. Timestamp (' 2011-03-12 04:00 ') #timestamp对象也能通过本地化, convert to other time zones
Stamp_utc=stamp.tz_localize (' Us/eastern ') #但是timestamp对象是什么东西???
Stamp_utc
Stamp_utc.tz_convert (' UTC ')

STAMP_MOSCOW=PD. Timestamp (' 2011-03-12 04:00 ', tz= ' UTC ') #创建timestamp对象时可以传入一个时区信息
Stamp_moscow
Stamp_moscow.value #UTC时间戳???
Stamp_moscow.tz_convert (' Us/eastern '). Value #时区变换, and this timestamp does not change. But why???

From pandas.tseries.offsets import Hour
Import NumPy as NP

Rng=pd.date_range (' 2016-09-01 ', periods=10,freq= ' B ')
Ts=series (Np.random.randn (RNG)), index=rng)

Ts1=ts[:7].tz_localize (' Europe/london ')
Ts2=ts1[2:].tz_convert (' Europe/moscow ')
Result=ts1+ts2 #当两个不同的时区相加时,
Result.index #最后得到的结果是UTC

P=PD. Period (2007,freq= ' A-dec ') #period类是一段时间的意思, which means from January 1, 2007 to December 31, 2007
P
P+5
P-2 #根据其频率对时间进行位移
Pd. Period (2009,freq= ' A-dec ')-P #频率相同时, can be directly subtracted from

Rng=pd.period_range (' 2009-09-09 ', ' 2010-01-02 ', freq= ' M ')
RNG #period_range can also create a time range for a rule

Value=[' 2003 ', ' 2004 ', ' 2005 ']
INDEX=PD. Periodindex (value,freq= ' Q-dec ') #periodindex是个什么类???
Index

P.asfreq (' M ', ' Start ') #将年度数据转换为月度的形式, converted to the month of the year
P.asfreq (' M ', ' End ') #将年度数据转换为月度的形式, converted to December of the year
P1=PD. Period (' freq= ', ' A-jun ')
P1.asfreq (' m ', ' Start ') #Period (' 2015-07 ', ' m ')
P1.asfreq (' m ', ' End ') #Period (' 2016-06 ', ' m ')

P2=PD. Period (' 2016-09 ', ' M ')
P2.asfreq (' A-jun ') #2016年9月进行频率转换, equivalent to 2017 years in the time frequency ending in June

Rng=pd.period_range (' 2006 ', ' freq= ', ' A-dec ')
Ts=series (Np.random.randn (RNG)), index=rng)
Ts.asfreq (' M ', how= ' start ')
Ts.asfreq (' M ', how= ' end ')

P5=PD. Period (' 2016q3 ', freq= ' A-mar ') #Q n What's the use here???
P5.asfreq (' d ', ' Start ') #Period (' 2015-04-01 ', ' d ')
P5.asfreq (' d ', ' End ') #Period (' 2016-03-31 ', ' d ')

Rng=pd.period_range (' 2011q3 ', ' 2012Q4 ', freq= ' Q-jan ') #使用period_range生成季度数据
Ts=series (Np.arange (RNG)), index=rng)

Rng=pd.date_range (' 2016-01 ', ' 2016-09 ', freq= ' m ')
Rng.to_period () #data_range对象才有to_period方法, the To_period method can convert a timestamp index to a time index
P6=PD. Period (' freq= ', ' A-dec ')
P6.to_timestamp (how= ' end ') #to_timestamp方法可以将时期索引转换为时间戳

Rng=pd.date_range (' 2016-01-29 ', periods=6,freq= ' d ')
Rng.to_period (freq= ' m ') #产生重叠的时间区间

Data analysis using Python-the Tenth Time series (1)

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.