Hive ETL's advertising industry SQL

Source: Internet
Author: User
Tags explode

-- case2 ----========== click_log ==========--/*11    ad_101     2014-05-01 06:01:12.334+0122    ad_102     2014-05-01 07:28:12.342+0133    ad_103    2014-05-01  07:50:12.33+0111    ad_104    2014-05-01 09:27:12.33+0122     ad_103    2014-05-01 09:03:12.324+0133    ad_ 102    2014-05-02 19:10:12.343+0111    ad_101     2014-05-02 09:07:12.344+0135    ad_105    2014-05-03  11:07:12.339+0122    ad_104    2014-05-03 12:59:12.743+0177     ad_103    2014-05-03 18:04:12.355+0199    ad_ 102    2014-05-04 00:36:39.713+0133    ad_101    2014-05-04 19:10:12.343+ 0111    ad_101    2014-05-05 09:07:12.344+0135     ad_102    2014-05-05 11:07:12.339+0122    ad_103     2014-05-05 12:59:12.743+0177    ad_104     2014-05-05 18:04:12.355+0199    ad_105    2014-05-05  20:36:39.713+01*/create external table click_log  (    cookie_id   STRING  , ad_id      STRING  , ts          string) row format delimited fields  terminated by  ' \ t ' location  '/tmp/db_case2/click_log '; Select cookie_id, collect_set (AD _ID)  as ordersfrom click_log--where ts >  ' 2014-05-02 ' Group by cookie_id;select cookie_id, group_ Concat (ad_id,  ' | ')  as ordersfrom click_log--where ts >  ' 2014-05-02 ' group by cookie_id ;--========== ad_list ==========--/*ad_101    http://abcn.net/     catalog8|catalog1ad_102    http://www.abcn.net/    catalog6| catalog3ad_103    http://fxlive.de/    catalog7ad_104     http://fxlive.fr/    catalog5|catalog1|catalog4|catalog9ad_105     http://fxlive.eu/    */CREATE EXTERNAL TABLE ad_list  (     ad_id    STRING  , url       STRING  , catalogs array<STRING>) Row format delimitedfields  terminated BY  ' t ' collection items terminated by  ' | location  '/tmp/db_case2/ad_list '; create external table ad_list_string  (   ad_id     string , url      string , catalogs string) ROW  format delimited fields terminated by  ' \ t ' location  '/tmp/db_case2/ad_list '; select click.cookie_id, click.ad_id, click.amount, ad_list_string.catalogs as  ordersfrom  (  select cookie_id, ad_id, count (1)  as amount from  click_log group by cookie_id, ad_id)  clickjoin ad_list_stringon  (ad_ LIST_STRING.AD_ID&NBSP;=&NBSP;CLICK.AD_ID); select ad_id, catalog from ad_list lateral  view outer explode (catalogs)  t as catalog;select ad_id, collect_set ( Catalog)  from ad_lisT lateral view outer explode (catalogs)  t as catalog group by  ad_id;select click.cookie_id, ad.catalog from click_log clickleft outer  join  (  select ad_id, catalog from ad_list lateral view  outer explode (catalogs)  t as catalog)  adon  (click.ad_id = ad.ad_ ID); Create table cookie_cats asselect click.cookie_id, ad.catalog, count (1)  as weight from click_log clickleft outer join  (  select  ad_id, catalog from ad_list lateral view outer explode (Catalogs)  t as catalog)  adon  (click.ad_id = ad.ad_id) Group by click.cookie _id, ad.catalogorder by cookie_id, weight desc;select cookie_id, collect_ Set (catalog)  from cookie_cats group by cookie_id; -- where catalog is not nullselect  Cookie_id, group_concat (catalog,  ' | ')  from cookie_cats group by cookie_id; -- impala group_concat


This article is from the "thick, thin hair, ax" blog, please be sure to keep this source http://tianxingzhe.blog.51cto.com/3390077/1717577

Hive ETL's advertising industry SQL

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.