Big Data statistics script, sub-city order statistics

Source: Internet
Author: User

Date_parameter <-"2016-08-01"
Start_date<-as. Date (Date_parameter)
Daycount_parameter = 1
Array = Strsplit (As.character (start_date), "-")
Year = As.character (Array[1])
Month=as.character (Array[2])


Hivecontext <-Sparkrhive.init (SC)
SQL (Hivecontext, "Use honeycomb_bh_db")

if (daycount_parameter>0) {
Date_parameter <-as.character (start_date)
#通过hiveSql get the desired set of aggregates and cache the SQL Date_add
# #程序执行阶段1: Data Preparation .....
Acquired_users_sql <-"Select Presentee_mobile,recommend_mobile, Shareid from Sc_t_acquire_record where sc_t_ acquire_record.year=2016 and Sc_t_acquire_record.month=08 and to_date (ct_time) = ' startdate ' and Shareid ' are not NULL '
All_order_sql <-"SELECT distinct Passenger_phone, (create_time) order_time,city_id from Sc_t_order_all_info as a wher e a.year=2016 and A.month=08 and To_date (a.create_time) >= ' StartDate ' and To_date (a.create_time) <=date_add (Date ( ' StartDate '), 7) and product_id=210 "

# user_agent, App
Share_id_city_id_sql <-"Select b.* from (select DISTINCT (receivephone), Min (substr (share_id, one)) time from SC_ Analyze_acquire_event as a where a.year=2016 and a.month=08 and to_date (create_time) = ' StartDate ' and (Event_Name like '%21 03% ' or event_name like '%2102% ') group by Receivephone) as a INNER join (SELECT *, substr (share_id, one) time2 from Sc_ana Lyze_acquire_event) as B on A.receivephone=b.receivephone and a.time=b.time2 "
Share_id_city_id_sql <-gsub (pattern= ' StartDate ', Replacement=date_parameter, Share_id_city_id_sql)

Acquired_users_sql<-gsub (pattern= ' StartDate ', Replacement=date_parameter, Acquired_users_sql)
All_order_sql<-gsub (pattern= ' StartDate ', Replacement=date_parameter, All_order_sql)

share_id_city_id <-sql (Hivecontext,share_id_city_id_sql);

#当天领券绑定的用户集合
Acquired_users <-sql (Hivecontext,acquired_users_sql)
Acquired_users<-distinct (Acquired_users)
Cache (Acquired_users)


#7日内的全订单集合
All_orders <-sql (Hivecontext,all_order_sql)

Acquired_users_with_orders_sql = Paste ("Select * FROM (", Acquired_users_sql, ") as Acquire inner join (", All_order_sql, ") As orders on acquire.presentee_mobile = Orders.passenger_phone where orders.passenger_phone are not null and Acquire.prese Ntee_mobile is not NULL ", sep=" ")
Acquired_users_with_orders <-sql (Hivecontext,acquired_users_with_orders_sql)
Cache (Acquired_users_with_orders)

Cache (share_id_city_id)
Acquired_users <-Join (acquired_users,share_id_city_id, share_id_city_id$ Receivephone==acquired_users$presentee_mobile, "inner")
Acquired_users$receivephone=null
#acquired_users <-filter (Acquired_users, "Shareid is not NULL")
acquired_users$acquire_city_id = acquired_users$city_id
acquired_users$city_id =null
Acquired_users$recommendphone=null
Acquired_users$receivephone=null


group1 <-count (group_by (acquired_users, "acquire_city_id"))
Acquired_users_with_orders = distinct ( acquired_users_with_orders)
###### #去掉, the number of people who are not in the same city to take a taxi .... On the off-site vouchers, off-site taxi, in the city statistics when neglected.
Acquired_users_with_orders$passenger_phone=null
Acquired_users_with_orders$forjoin = acquired_users_with_ orders$city_id
acquired_users_with_orders$city_id = NULL
Acquired_users_with_orders<-join (acquired_ users_with_orders,share_id_city_id, acquired_users_with_orders$shareid==share_id_city_id$share_id & Acquired_ Users_with_orders$presentee_mobile = = Share_id_city_id$receivephone & share_id_city_id$city_id==acquired_users _with_orders$forjoin & Share_id_city_id$recommendphone==acquired_users_with_orders$recommend_mobile, "inner")
Acquired_users_with_orders$forjoin=null

Group2 <-count (group_by (acquired_users_with_orders, "city_id"))
Group2er_count= Group2$count
Group2$count=null
group2er_city_id = group2$city_id
Group2$city_id=null
Group3 <-Join (group1,group2, group1$acquire_city_id==group2er_city_id, "inner")
Group3$ratio <-Group3er_count/group3$count
Cache (GROUP3)
CityName <-SQL (Hivecontext, "select * from Sc_mis_city")
City_conv_rank = Join (Group3, CityName, Group3er_city_id==cityname$id, "inner")
City_conv_rank$id=null
City_conv_rank<-orderby (City_conv_rank,-city_conv_rank$ratio)
City_conv_rank$pid=null
City_conv_rank$coupon_count=city_conv_rank$count
City_conv_rank$count=null
City_conv_rank$first_order_count=city_conv_ranker_count
City_conv_ranker_count=null
City_conv_rank$convert_ratio = City_conv_rank$ratio
City_conv_rank$ratio = NULL
}
SHOWDF (City_conv_rank, 1500)

Big Data statistics script, sub-city order statistics

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.