Date_parameter <-"2016-08-01"
Start_date<-as. Date (Date_parameter)
Daycount_parameter = 1
Array = Strsplit (As.character (start_date), "-")
Year = As.character (Array[1])
Month=as.character (Array[2])
Hivecontext <-Sparkrhive.init (SC)
SQL (Hivecontext, "Use honeycomb_bh_db")
if (daycount_parameter>0) {
Date_parameter <-as.character (start_date)
#通过hiveSql get the desired set of aggregates and cache the SQL Date_add
# #程序执行阶段1: Data Preparation .....
Acquired_users_sql <-"Select Presentee_mobile,recommend_mobile, Shareid from Sc_t_acquire_record where sc_t_ acquire_record.year=2016 and Sc_t_acquire_record.month=08 and to_date (ct_time) = ' startdate ' and Shareid ' are not NULL '
All_order_sql <-"SELECT distinct Passenger_phone, (create_time) order_time,city_id from Sc_t_order_all_info as a wher e a.year=2016 and A.month=08 and To_date (a.create_time) >= ' StartDate ' and To_date (a.create_time) <=date_add (Date ( ' StartDate '), 7) and product_id=210 "
# user_agent, App
Share_id_city_id_sql <-"Select b.* from (select DISTINCT (receivephone), Min (substr (share_id, one)) time from SC_ Analyze_acquire_event as a where a.year=2016 and a.month=08 and to_date (create_time) = ' StartDate ' and (Event_Name like '%21 03% ' or event_name like '%2102% ') group by Receivephone) as a INNER join (SELECT *, substr (share_id, one) time2 from Sc_ana Lyze_acquire_event) as B on A.receivephone=b.receivephone and a.time=b.time2 "
Share_id_city_id_sql <-gsub (pattern= ' StartDate ', Replacement=date_parameter, Share_id_city_id_sql)
Acquired_users_sql<-gsub (pattern= ' StartDate ', Replacement=date_parameter, Acquired_users_sql)
All_order_sql<-gsub (pattern= ' StartDate ', Replacement=date_parameter, All_order_sql)
share_id_city_id <-sql (Hivecontext,share_id_city_id_sql);
#当天领券绑定的用户集合
Acquired_users <-sql (Hivecontext,acquired_users_sql)
Acquired_users<-distinct (Acquired_users)
Cache (Acquired_users)
#7日内的全订单集合
All_orders <-sql (Hivecontext,all_order_sql)
Acquired_users_with_orders_sql = Paste ("Select * FROM (", Acquired_users_sql, ") as Acquire inner join (", All_order_sql, ") As orders on acquire.presentee_mobile = Orders.passenger_phone where orders.passenger_phone are not null and Acquire.prese Ntee_mobile is not NULL ", sep=" ")
Acquired_users_with_orders <-sql (Hivecontext,acquired_users_with_orders_sql)
Cache (Acquired_users_with_orders)
Cache (share_id_city_id)
Acquired_users <-Join (acquired_users,share_id_city_id, share_id_city_id$ Receivephone==acquired_users$presentee_mobile, "inner")
Acquired_users$receivephone=null
#acquired_users <-filter (Acquired_users, "Shareid is not NULL")
acquired_users$acquire_city_id = acquired_users$city_id
acquired_users$city_id =null
Acquired_users$recommendphone=null
Acquired_users$receivephone=null
group1 <-count (group_by (acquired_users, "acquire_city_id"))
Acquired_users_with_orders = distinct ( acquired_users_with_orders)
###### #去掉, the number of people who are not in the same city to take a taxi .... On the off-site vouchers, off-site taxi, in the city statistics when neglected.
Acquired_users_with_orders$passenger_phone=null
Acquired_users_with_orders$forjoin = acquired_users_with_ orders$city_id
acquired_users_with_orders$city_id = NULL
Acquired_users_with_orders<-join (acquired_ users_with_orders,share_id_city_id, acquired_users_with_orders$shareid==share_id_city_id$share_id & Acquired_ Users_with_orders$presentee_mobile = = Share_id_city_id$receivephone & share_id_city_id$city_id==acquired_users _with_orders$forjoin & Share_id_city_id$recommendphone==acquired_users_with_orders$recommend_mobile, "inner")
Acquired_users_with_orders$forjoin=null
Group2 <-count (group_by (acquired_users_with_orders, "city_id"))
Group2er_count= Group2$count
Group2$count=null
group2er_city_id = group2$city_id
Group2$city_id=null
Group3 <-Join (group1,group2, group1$acquire_city_id==group2er_city_id, "inner")
Group3$ratio <-Group3er_count/group3$count
Cache (GROUP3)
CityName <-SQL (Hivecontext, "select * from Sc_mis_city")
City_conv_rank = Join (Group3, CityName, Group3er_city_id==cityname$id, "inner")
City_conv_rank$id=null
City_conv_rank<-orderby (City_conv_rank,-city_conv_rank$ratio)
City_conv_rank$pid=null
City_conv_rank$coupon_count=city_conv_rank$count
City_conv_rank$count=null
City_conv_rank$first_order_count=city_conv_ranker_count
City_conv_ranker_count=null
City_conv_rank$convert_ratio = City_conv_rank$ratio
City_conv_rank$ratio = NULL
}
SHOWDF (City_conv_rank, 1500)
Big Data statistics script, sub-city order statistics