大資料統計指令碼, 分城市訂單統計

來源:互聯網
上載者:User

標籤:

 

date_parameter <- "2016-08-01"
start_date<-as.Date(date_parameter)
dayCount_parameter = 1
array = strsplit(as.character(start_date),"-")
year = as.character(array[1])
month=as.character(array[2])


hiveContext <- sparkRHive.init(sc)
sql(hiveContext, "use honeycomb_bh_db")

if(dayCount_parameter>0){
date_parameter <-as.character(start_date)
#通過hiveSql 獲得想要的並集集合并且緩衝下來 sql date_add
##程式執行階段1: 資料準備。。。。。
acquired_users_sql <-"select presentee_mobile,recommend_mobile, shareid from sc_t_acquire_record where sc_t_acquire_record.year=2016 and sc_t_acquire_record.month=08 and to_date(ct_time)=‘STARTDATE‘ and shareid is not null"
all_order_sql <- "select distinct passenger_phone, (create_time) order_time,city_id from sc_t_order_all_info As a where a.year=2016 and a.month=08 and to_date(a.create_time)>=‘STARTDATE‘ and to_date(a.create_time)<=date_add(date(‘STARTDATE‘),7) and product_id=210"

# user_agent, app
share_id_city_id_sql <-"select b.* from (select distinct(receivePhone), min(substr(share_id, 11)) time from sc_analyze_acquire_event As a where a.year=2016 and a.month=08 and to_date(create_time)=‘STARTDATE‘ and (event_name like ‘%2103%‘ or event_name like ‘%2102%‘) group by receivePhone) As a inner join (select *, substr(share_id, 11) time2 from sc_analyze_acquire_event) as b on a.receivePhone=b.receivePhone and a.time=b.time2"
share_id_city_id_sql <- gsub(pattern=‘STARTDATE‘, replacement=date_parameter, share_id_city_id_sql)

acquired_users_sql<-gsub(pattern=‘STARTDATE‘, replacement=date_parameter, acquired_users_sql)
all_order_sql<-gsub(pattern=‘STARTDATE‘, replacement=date_parameter, all_order_sql)

share_id_city_id <-sql(hiveContext,share_id_city_id_sql);

#當天領券綁定的使用者集合
acquired_users <-sql(hiveContext,acquired_users_sql)
acquired_users<-distinct(acquired_users)
cache(acquired_users)


#7日內的全訂單集合
all_orders <-sql(hiveContext,all_order_sql)

acquired_users_with_orders_sql = paste("select * from (", acquired_users_sql,") As acquire inner join (",all_order_sql, ") As orders on acquire.presentee_mobile = orders.passenger_phone where orders.passenger_phone is not null and acquire.presentee_mobile is not null",sep="")
acquired_users_with_orders <-sql(hiveContext,acquired_users_with_orders_sql)
cache(acquired_users_with_orders)

cache(share_id_city_id)
acquired_users <- join(acquired_users,share_id_city_id, share_id_city_id$receivePhone==acquired_users$presentee_mobile, "inner")
acquired_users$receivePhone=NULL
#acquired_users<-filter(acquired_users, "shareid is not null")
acquired_users$acquire_city_id = acquired_users$city_id
acquired_users$city_id =NULL
acquired_users$recommendPhone=NULL
acquired_users$receivePhone=NULL


group1 <- count(group_by(acquired_users, "acquire_city_id"))
acquired_users_with_orders = distinct(acquired_users_with_orders)
#######去掉, 不在一個同一個城市領券打車的人數。 。。。對異地領券,異地打車, 在分城市統計的時候忽略不計。
acquired_users_with_orders$passenger_phone=NULL
acquired_users_with_orders$forJoin = acquired_users_with_orders$city_id
acquired_users_with_orders$city_id = NULL
acquired_users_with_orders<-join(acquired_users_with_orders,share_id_city_id, acquired_users_with_orders$shareid==share_id_city_id$share_id & acquired_users_with_orders$presentee_mobile == share_id_city_id$receivePhone & share_id_city_id$city_id==acquired_users_with_orders$forJoin & share_id_city_id$recommendPhone==acquired_users_with_orders$recommend_mobile, "inner")
acquired_users_with_orders$forJoin=NULL

group2 <-count(group_by(acquired_users_with_orders, "city_id"))
group2$order_count= group2$count
group2$count=NULL
group2$order_city_id = group2$city_id
group2$city_id=NULL
group3 <- join(group1,group2, group1$acquire_city_id==group2$order_city_id, "inner")
group3$ratio <- group3$order_count/group3$count
cache(group3)
cityName <- sql(hiveContext, "select * from sc_mis_city")
city_conv_rank = join(group3, cityName, group3$order_city_id==cityName$id, "inner")
city_conv_rank$id=NULL
city_conv_rank<-orderBy(city_conv_rank, -city_conv_rank$ratio)
city_conv_rank$pid=NULL
city_conv_rank$coupon_count=city_conv_rank$count
city_conv_rank$count=NULL
city_conv_rank$first_order_count=city_conv_rank$order_count
city_conv_rank$order_count=NULL
city_conv_rank$convert_ratio = city_conv_rank$ratio
city_conv_rank$ratio = NULL
}
showDF(city_conv_rank, 1500)

大資料統計指令碼, 分城市訂單統計

相關文章

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.