This function is a function under the Stringr package, it is useful to do data cleaning, presumably use to extract a string under a certain content, according to some of the rules you want, the specific use of the following:
x<-"abacdef12g"
Str_extract_all (x, "[F0-9]")
[[1]]
[1] "F" "1" "2"
> Str_extract_all (x, "[f0-9]{1,3}")
[[1]]
[1] "F12"
> Str_extract_all (x, "[f0-9]{1,2}")
[[1]]
[1] "F1" "2"
Attach some code written in peacetime
Library (GGPLOT2) library (rmysql) library (STRINGR) library (SQLDF) library (PLYR) conn <-(MySQL (), Dbconnect = " Tracker ", username=" gaoyang922 ", password=" gaoyang922@123456! ", host=" 10.10.109.62 ", port=1333) dbsendquery (conn, ' SET NAMES UTF8 ') query<-dbsendquery (conn, select Key_table,left (insert_time,8) as Insert_date,label,sessionid, Stay_time,site,page_url from tracker.hbase_visit where insert_time isn't null and page_url like ' https://item.zhong.com% ' ") Rawdata_vi <-Fetch (QUERY,N=-1) Dbdisconnect (conn) Dim (Rawdata_vi) Head (RAWDATA_VI) Nrow (RAWDATA_VI) # Rawdata_v I$prodid=as.numeric (Unlist (Str_extract_all (Rawdata_vi$page_url, "[0-9]{1,2}")) F<-function (x) {if (Grepl ()
ProductId ", x)" {Result<-as.numeric (Unlist (Str_extract_all (x, "[0-9]{1,2}")) [1]) else{result<-9999} Result} Rawdata_vi$prodid =sapply (rawdata_vi$page_url,f) rawdata_vi_prod<-subset (rawdata_vi,prodid!=9999) head ( Rawdata_vi_prod) Dim (rawdata_vi_prod) ############## above is gettingThe product ID of each URL is the following code to get the attributes of each product conn <-Dbconnect (MySQL (), dbname = "PMS", Username= "gaoyang922", password= "Gaoyan" g922@123456! ", host=" 10.10.109.62 ", port=1333) dbsendquery (conn, ' SET NAMES UTF8 ') query<-dbsendquery (conn," select A.*,b.bizcategoryname from Pms.pms_product a LEFT join pms.pms_biz_category b on a.producttype = b.ID ") rawdata_pd <- Fetch (QUERY,N=-1) Dbdisconnect (conn) head (RAWDATA_PD) ####### ##### Daily Summary prod_sumy1= rawdata_vi_prod[,c (2,8)] Prod_ Sumy2 = count (Prod_sumy1, C ("Insert_date", "ProdID")) Prod_sumy3 = count (prod_sumy1, "ProdID") head (prod_sumy2) prod1 = Merge (Prod_sumy2,rawdata_pd[,c (1,4,5,31)],by.x = "ProdID", by.y = "ID", all.x=t) head (prod1) Daily_prod_rank = Prod1[wit H (prod1, Order (-as.numeric (insert_date),-freq)),] names (Daily_prod_rank) [C (1,2,3)] <-C ("ProdId", "Insertdate", "" Pvcnt ") head (daily_prod_rank) daily_prod_rank$insertdate = As.character (daily_prod_rank$insertdate) ### Import to Database conn t;-Dbconnect (MySQL (), dbname = "analyse", username= "gaoyang922", password= "gaoyang922@123456!", host= "10.10.109.62", port=1333) dbwritetable (conn, "Daily_ Prod_rank_raw ", prod_sumy2) ### products 1. Concerned, 2. Purchase, 3. Reservation, 4. Conn <-Dbconnect (MySQL (), dbname =" PMS ", username=" gaoyang92 " 2 ", password=" gaoyang922@123456! ", host=" 10.10.109.62 ", port=1333) query<-dbsendquery (conn," Select ProductID, Relationtype, (case when relationtype=1 then ' attention ' when relationtype=2 then ' buy ' when relationtype=4th
En ' o ' rt_desc ' Else ' appointment ' end ' as a, count (*) as pd_cnt from Pms.pms_user_relation where createtime between ' 2015-08-18 ' and ' 2015-09-05 ' GROUP by Productid,relationtype ) Rawdata_rt <-Fetch (QUERY,N=-1) Dbdisconnect (conn) head (RAWDATA_RT) Table (ProdID) prodid_ggplot<-rawdata_vi$ Prodid;prodid_ggplot<-reorder (prodid_ggplot,prodid_ggplot,length) Rawdata_vi$prodid_ggplot<-prodide_ Ggplot Ggplot (Subset (rawdata_vi,prodid!=9999), AES (X=prodid_ggplot) +geom_bar () # Library (GGPLOT2) # library (rmysql) # library (STRINGR) # library (DPLYR) # conn <-Dbconnect (MySQL () , dbname = "Tracker", Username= "zhoumeixu204", password= "zhoumeixu204@123456!", host= "10.10.109.62", port=1333) # Query <-dbsendquery (conn, "Select Key_table,left" (insert_time,8) as # Insert_date,label,sessionid,stay_tim E,site,page_url from Tracker.hbase_visit # where insert_time are NOT null ') # # # Query_1<-dbsend Query (Conn, "# SELECT * Tracker.hbase_visitor where Insert_time # is Not null and are NOT null and country= ' Rawdata_vi ' "# <-Fetch (QUERY,N=-1) # Hbase_visitor<-fetch (q
Uery_1,n=-1) # Dbdisconnect (conn) # Dim (Rawdata_vi) # Head (RAWDATA_VI) # f<-function (x) {# if (Grepl ("ProductId", X)) {
# Result<-as.numeric (Unlist (Str_extract_all (x, "[0-9]{1,2}")] [1]) # # else{# result<-9999 #} # result #} # rawdata_vi$ProdID =sapply (rawdata_vi$page_url,f) # Rawdata_vi<-subset (rawdata_vi,prodid!=9999) # table (ProdID) # # PRODID_GG Plot<-rawdata_vi$prodid;prodid_ggplot<-reorder (prodid_ggplot,prodid_ggplot,length) # rawdata_vi$prodID_ Ggplot<-prodid_ggplot # Site_ggplot<-rawdata_vi$site;site_ggplot<-reorder (Site_ggplot,site_ggplot, Length) # Rawdata_vi$site_ggplot<-site_ggplot # Ggplot (Subset (rawdata_vi,prodid!=9999), AES (X=prodid_ggplot)) + Geom_bar (Aes (Fill=prodid_ggplot)) # Ggplot (Subset (rawdata_vi,prodid!=9999), AES (X=prodid_ggplot,fill=factor ( insert_date)) +geom_bar (position = ' stack ') +labs (title= "Mobile Terminal ratio Column Chart") # Ggplot (Rawdata_vi,aes Factor (Insert_date)) +geom_bar (position = ' dodge ') +labs (title= "Mobile Terminal ratio Column Chart") # Ggplot (Rawdata_vi,aes, Fill=factor (insert_date))) +geom_bar (position = ' stack ') +labs (title= "Mobile Terminal ratio Column Chart") # Ggplot (Rawdata_vi,aes Ggplot,fill=factor (insert_date)) +geom_bar (position = ' dodge ') +labs (title= "Mobile Terminal ratio Column Chart") +facet_wrap (~insert_date,ncol=1) # str (RAWDATA_VI) # rawdata_vi_to_mysql<-data.frame (Rawdata_vi$key_table,rawdata_vi$insert_date, rawdata_vi$label,rawdata_vi$sessionid,rawdata_vi$stay_time,rawdata_vi$site,rawdata_vi$page_url,rawdata_vi$ ProdID) # conn <-Dbconnect (MySQL (), dbname = "Analyse_dev", username= "root", password= "pa123456!", host= " 202.69.27.239 ", port=8443) # Rawdata_vi_to_mysql<-subset (Rawdata_vi_to_mysql,rawdata_vi.prodid!=9999) # Dbwritetable (conn, "Rawdata_vi_to_mysql", Rawdata_vi_to_mysql) # dbdisconnect (conn) #