Hive ETL Order Logistics system SQL

Source: Internet
Author: User

-- case1 ----========== order_created ==========--/*10703007267488     2014-05-01 06:01:12.334+0110101043505096    2014-05-01 07:28:12.342+ 0110103043509747    2014-05-01 07:50:12.33+0110103043501575     2014-05-01 09:27:12.33+0110104043514061    2014-05-01 09:03:12.324+01*/create  EXTERNAL TABLE order_created  (    orderNumber STRING   ,  event_time  string) row format delimited fields terminated by  ' \ T ' location  '/tmp/db_case1/order_created '; create external table order_created_partition  (    orderNumber  string  , event_time  string) partitioned by  (event_month string) ROW  FORMAT DELIMITED FIELDS TERMINATED BY  ' t ' location  '/tmp/db_Case1/order_created_partition '; create table order_created_dynamic_partition  (    ordernumber string   , event_time  string) partitioned by  (event_month string); insert  into table order_created_dynamic_partition PARTITION  (Event_month) select  Ordernumber, event_time, substr (event_time, 1, 7)  as event_month from  order_created;set hive.exec.dynamic.partition.mode=nonstrict;/*     hive.exec.dynamic.partition=false    hive.exec.dynamic.partition.mode=strict     hive.exec.max.dynamic.partitions.pernode=100    maximum number of  dynamic partitions allowed to be created in each mapper/reducer  node    hive.exec.max.dynamic.partitions=1000            maximum number of dynamic partitions allowed to be created  in total    hive.exec.max.created.files=100000               Maximum number of HDFS files  created by all mappers/reducers in a mapreduce job     hive.error.on.empty.partition=false*/select input__file__name, ordernumber, event_time,  block__offset__inside__file /  (Length (ordernumber)  + length (event_time)  + 2)  + 1 from order_created_dynamic_partition;select input__file__name, ordernumber,  event_time, round (block__offset__inside__file /  (Length (ordernumber)  + length ( Event_time)  + 2)  + 1)  from order_created_dynamic_partition;desc formatted  order_created_dynamic_partition;desc formatted order_created_dynamic_partition partition  (event_month= ' 2014-05 ‘); create table order_created_dynamic_partition_parquet  (    orderNumber  string  , event_time  string) partitioned by  (event_month string) ROW  FORMAT DELIMITED FIELDS TERMINATED BY  ' t ' stored as parquet; msck repair table order_created_dynamic_partition_parquet;-- set to text  File format, bug in hivealter table order_created_dynamic_partition_parquet  PARTITION  (event_month= ' 2014-06 ')  SET SERDE  ' Org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe '; alter table order_created_dynamic_partition_parquet partition  (event_month= ' 2014-06 ')  set fileformat textfile;-- impalaalter table order_created_dynamic_partition_ Parquet partition  (event_month= ' 2014-06 ')  SET FILEFORMAT textfile;-- set to parquet  File format, hive <= 0.12alter table order_created_dynamic_partition_parquet  PARTITION  (event_month= ' 2014-07 ')  SET SERDE  ' parquet.hive.serde.ParquetHiveSerDe '; alter table order_created_dynamic_partition_parquet partition  (event_month= ' 2014-07 ')  SET FILEFORMAT INPUTFORMAT  ' Parquet.hive.DeprecatedParquetInputFormat '  outputformat   ' Parquet.hive.DeprecatedParquetOutputFormat ';-- impala or hive 0.13alter table  order_created_dynamic_partition_parquet PARTITION  (event_month= ' 2014-07 ')  SET  fileformat parquet;insert into table order_created_dynamic_partition_parquet  partition  (event_month= ' 2014-07 ')  select ordernumber, event_time from order_ created;--========== order_picked ==========--/*10703007267488    2014-05-01 07:02:12.334+0110101043505096     2014-05-01 08:29:12.342+0110103043509747    2014-05-01  10:55:12.33+01*/create external table order_picked  (    ordernumber  string  , event_time  string) row format delimited fields  terminated by  ' \ t ' location  '/tmp/db_case1/order_picked ';--========== order_shipped == ========--/*10703007267488    2014-05-01 10:00:12.334+0110101043505096     2014-05-01 18:39:12.342+01*/CREATE EXTERNAL TABLE order_shipped  (     ordernumber string  , event_time  string) ROW FORMAT  delimited fields terminated by  ' \ t ' location  '/tmp/db_case1/order_shipped ';--========= = order_received ==========--/*10703007267488    2014-05-02 12:12:12.334+01*/create external  table order_received  (    ordernumber string  , event_time   string) row format delimited fields terminated by  ' \ t ' LOCATION  ' /tmp/db_case1/order_received ';--========== order_cancelled ==========--/*10103043501575     2014-05-01 12:12:12.334+01*/CREATE EXTERNAL TABLE order_cancelled  (     ordernumber string  , event_time  string) ROW FORMAT  delimited fields terminated by  ' \ t ' location  '/tmp/db_case1/order_cancelled ';--======= ==============================--create table order_tracking asselect ordernumber      , max (case when type_id= "order_created"    THEN  Event_time else&nBSP; ' 0 '  end)  as order_created_ts     , max (CASE WHEN type_id = "order_picked"     THEN event_time ELSE  ' 0 '  end)  as order_ Picked_ts     , max (case when type_id= "order_shipped"     THEN event_time ELSE  ' 0 '  end)  AS order_shipped_ts      , max (case when type_id= "order_received"   THEN event_time ELSE  " 0 '  end)  as order_received_ts     , max (CASE WHEN type_ Id= "order_cancelled"  THEN event_time ELSE  ' 0 '  end)  as order_cancelled_ tsfrom  (    select orderNumber,  "order_created"    as  Type_id, event_time from order_created  union all    select  ordernumber,&nbsP; " Order_picked "    as type_id, event_time FROM order_picked   union all    select ordernumber,  "order_shipped"    as  Type_id, event_time from order_shipped  union all    select  orderNumber,  "Order_received"   as type_id, event_time from order_ received  union all    select ordernumber,  "Order_cancelled"  as type_id, event_time from order_cancelled)  ugroup by orderNumber; select * from order_tracking order by order_created_ts limit 5;--===== ================================--create table order_tracking_join asselect t1.ordernumber      , t1.event_time as order_created_ts      ,  t2.event_time&Nbsp;as order_picked_ts     , t3.event_time as order_shipped_ts      , t4.event_time as order_received_ts      , t5.event_time as order_cancelled_tsfrom  (  select ordernumber,  max (event_time)  as event_time from order_created group by ordernumber )  t1left outer join  (  select ordernumber, max (event_time)  as  event_time from order_picked group by ordernumber)  t2on  t1.ordernumber = t2.ordernumberleft outer join  (  select ordernumber,  max (event_time)  as event_time from order_shipped group by ordernumber )  t3on t1.ordernumber = t3.ordernumberleft outer join  (  select  ordernumber, max (event_time) &NBSp;as event_time from order_received group by ordernumber)  t4on  t1.ordernumber = t4.ordernumberleft outer join  (  select ordernumber,  max (event_time)  as event_time from order_cancelled group by  OrderNumber)  t5on t1.ordernumber = t5.ordernumber;select * from order_ tracking_join order by order_created_ts limit 5;--=================================== ==--select ordernumber     , order_created_ts      ,  order_picked_ts     , order_shipped_ts     ,  order_received_ts     , order_cancelled_ts  from order_ tracking where order_created_ts !=  ' 0 '  AND order_cancelled_ts =  ' 0 '    AND  (&NBSP;&NBSP;&NBsp; coalesce (Unix_timestamp (order_picked_ts,  ' Yyyy-mm-dd hh:mm:ss. S '),  0)  - unix_timestamp (order_created_ts,  ' Yyyy-mm-dd hh:mm:ss. S ')  > 2 * 60 * 60    or    coalesce ( Unix_timestamp (order_shipped_ts,  ' Yyyy-mm-dd hh:mm:ss. S '),  0)  - unix_timestamp (order_created_ts,  ' Yyyy-mm-dd hh:mm:ss. S ')  > 4 * 60 * 60    or    coalesce ( Unix_timestamp (order_shipped_ts,  ' Yyyy-mm-dd hh:mm:ss. S '),  0)  - unix_timestamp (order_created_ts,  ' Yyyy-mm-dd hh:mm:ss. S ')  > 48 * 60 * 60   );select ordernumber      , order_created_ts     , order_picked_ts      , order_shipped_ts     , order_received_tS     , order_cancelled_ts  from order_tracking_join where  order_created_ts IS NOT NULL AND order_cancelled_ts IS NULL    AND  (    coalesce (Unix_timestamp (order_picked_ts,  ' yyyy-mm-dd  HH:mm:ss. S '),  0)  - unix_timestamp (order_created_ts,  ' Yyyy-mm-dd hh:mm:ss. S ')  > 2 * 60 * 60    or    coalesce ( Unix_timestamp (order_shipped_ts,  ' Yyyy-mm-dd hh:mm:ss. S '),  0)  - unix_timestamp (order_created_ts,  ' Yyyy-mm-dd hh:mm:ss. S ')  > 4 * 60 * 60    or    coalesce ( Unix_timestamp (order_shipped_ts,  ' Yyyy-mm-dd hh:mm:ss. S '),  0)  - unix_timestamp (order_created_ts,  ' Yyyy-mm-dd hh:mm:ss. S ')  > 48 * 60 * 60   ); 


This article is from the "thick, thin hair, ax" blog, please be sure to keep this source http://tianxingzhe.blog.51cto.com/3390077/1717573

Hive ETL Order Logistics system SQL

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.