-- case1 ----========== order_created ==========--/*10703007267488 2014-05-01 06:01:12.334+0110101043505096 2014-05-01 07:28:12.342+ 0110103043509747 2014-05-01 07:50:12.33+0110103043501575 2014-05-01 09:27:12.33+0110104043514061 2014-05-01 09:03:12.324+01*/create EXTERNAL TABLE order_created ( orderNumber STRING , event_time string) row format delimited fields terminated by ' \ T ' location '/tmp/db_case1/order_created '; create external table order_created_partition ( orderNumber string , event_time string) partitioned by (event_month string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' t ' location '/tmp/db_Case1/order_created_partition '; create table order_created_dynamic_partition ( ordernumber string , event_time string) partitioned by (event_month string); insert into table order_created_dynamic_partition PARTITION (Event_month) select Ordernumber, event_time, substr (event_time, 1, 7) as event_month from order_created;set hive.exec.dynamic.partition.mode=nonstrict;/* hive.exec.dynamic.partition=false hive.exec.dynamic.partition.mode=strict hive.exec.max.dynamic.partitions.pernode=100 maximum number of dynamic partitions allowed to be created in each mapper/reducer node hive.exec.max.dynamic.partitions=1000 maximum number of dynamic partitions allowed to be created in total hive.exec.max.created.files=100000 Maximum number of HDFS files created by all mappers/reducers in a mapreduce job hive.error.on.empty.partition=false*/select input__file__name, ordernumber, event_time, block__offset__inside__file / (Length (ordernumber) + length (event_time) + 2) + 1 from order_created_dynamic_partition;select input__file__name, ordernumber, event_time, round (block__offset__inside__file / (Length (ordernumber) + length ( Event_time) + 2) + 1) from order_created_dynamic_partition;desc formatted order_created_dynamic_partition;desc formatted order_created_dynamic_partition partition (event_month= ' 2014-05 ‘); create table order_created_dynamic_partition_parquet ( orderNumber string , event_time string) partitioned by (event_month string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' t ' stored as parquet; msck repair table order_created_dynamic_partition_parquet;-- set to text File format, bug in hivealter table order_created_dynamic_partition_parquet PARTITION (event_month= ' 2014-06 ') SET SERDE ' Org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe '; alter table order_created_dynamic_partition_parquet partition (event_month= ' 2014-06 ') set fileformat textfile;-- impalaalter table order_created_dynamic_partition_ Parquet partition (event_month= ' 2014-06 ') SET FILEFORMAT textfile;-- set to parquet File format, hive <= 0.12alter table order_created_dynamic_partition_parquet PARTITION (event_month= ' 2014-07 ') SET SERDE ' parquet.hive.serde.ParquetHiveSerDe '; alter table order_created_dynamic_partition_parquet partition (event_month= ' 2014-07 ') SET FILEFORMAT INPUTFORMAT ' Parquet.hive.DeprecatedParquetInputFormat ' outputformat ' Parquet.hive.DeprecatedParquetOutputFormat ';-- impala or hive 0.13alter table order_created_dynamic_partition_parquet PARTITION (event_month= ' 2014-07 ') SET fileformat parquet;insert into table order_created_dynamic_partition_parquet partition (event_month= ' 2014-07 ') select ordernumber, event_time from order_ created;--========== order_picked ==========--/*10703007267488 2014-05-01 07:02:12.334+0110101043505096 2014-05-01 08:29:12.342+0110103043509747 2014-05-01 10:55:12.33+01*/create external table order_picked ( ordernumber string , event_time string) row format delimited fields terminated by ' \ t ' location '/tmp/db_case1/order_picked ';--========== order_shipped == ========--/*10703007267488 2014-05-01 10:00:12.334+0110101043505096 2014-05-01 18:39:12.342+01*/CREATE EXTERNAL TABLE order_shipped ( ordernumber string , event_time string) ROW FORMAT delimited fields terminated by ' \ t ' location '/tmp/db_case1/order_shipped ';--========= = order_received ==========--/*10703007267488 2014-05-02 12:12:12.334+01*/create external table order_received ( ordernumber string , event_time string) row format delimited fields terminated by ' \ t ' LOCATION ' /tmp/db_case1/order_received ';--========== order_cancelled ==========--/*10103043501575 2014-05-01 12:12:12.334+01*/CREATE EXTERNAL TABLE order_cancelled ( ordernumber string , event_time string) ROW FORMAT delimited fields terminated by ' \ t ' location '/tmp/db_case1/order_cancelled ';--======= ==============================--create table order_tracking asselect ordernumber , max (case when type_id= "order_created" THEN Event_time else&nBSP; ' 0 ' end) as order_created_ts , max (CASE WHEN type_id = "order_picked" THEN event_time ELSE ' 0 ' end) as order_ Picked_ts , max (case when type_id= "order_shipped" THEN event_time ELSE ' 0 ' end) AS order_shipped_ts , max (case when type_id= "order_received" THEN event_time ELSE " 0 ' end) as order_received_ts , max (CASE WHEN type_ Id= "order_cancelled" THEN event_time ELSE ' 0 ' end) as order_cancelled_ tsfrom ( select orderNumber, "order_created" as Type_id, event_time from order_created union all select ordernumber,&nbsP; " Order_picked " as type_id, event_time FROM order_picked union all select ordernumber, "order_shipped" as Type_id, event_time from order_shipped union all select orderNumber, "Order_received" as type_id, event_time from order_ received union all select ordernumber, "Order_cancelled" as type_id, event_time from order_cancelled) ugroup by orderNumber; select * from order_tracking order by order_created_ts limit 5;--===== ================================--create table order_tracking_join asselect t1.ordernumber , t1.event_time as order_created_ts , t2.event_time&Nbsp;as order_picked_ts , t3.event_time as order_shipped_ts , t4.event_time as order_received_ts , t5.event_time as order_cancelled_tsfrom ( select ordernumber, max (event_time) as event_time from order_created group by ordernumber ) t1left outer join ( select ordernumber, max (event_time) as event_time from order_picked group by ordernumber) t2on t1.ordernumber = t2.ordernumberleft outer join ( select ordernumber, max (event_time) as event_time from order_shipped group by ordernumber ) t3on t1.ordernumber = t3.ordernumberleft outer join ( select ordernumber, max (event_time) &NBSp;as event_time from order_received group by ordernumber) t4on t1.ordernumber = t4.ordernumberleft outer join ( select ordernumber, max (event_time) as event_time from order_cancelled group by OrderNumber) t5on t1.ordernumber = t5.ordernumber;select * from order_ tracking_join order by order_created_ts limit 5;--=================================== ==--select ordernumber , order_created_ts , order_picked_ts , order_shipped_ts , order_received_ts , order_cancelled_ts from order_ tracking where order_created_ts != ' 0 ' AND order_cancelled_ts = ' 0 ' AND (&NBSP;&NBSP;&NBsp; coalesce (Unix_timestamp (order_picked_ts, ' Yyyy-mm-dd hh:mm:ss. S '), 0) - unix_timestamp (order_created_ts, ' Yyyy-mm-dd hh:mm:ss. S ') > 2 * 60 * 60 or coalesce ( Unix_timestamp (order_shipped_ts, ' Yyyy-mm-dd hh:mm:ss. S '), 0) - unix_timestamp (order_created_ts, ' Yyyy-mm-dd hh:mm:ss. S ') > 4 * 60 * 60 or coalesce ( Unix_timestamp (order_shipped_ts, ' Yyyy-mm-dd hh:mm:ss. S '), 0) - unix_timestamp (order_created_ts, ' Yyyy-mm-dd hh:mm:ss. S ') > 48 * 60 * 60 );select ordernumber , order_created_ts , order_picked_ts , order_shipped_ts , order_received_tS , order_cancelled_ts from order_tracking_join where order_created_ts IS NOT NULL AND order_cancelled_ts IS NULL AND ( coalesce (Unix_timestamp (order_picked_ts, ' yyyy-mm-dd HH:mm:ss. S '), 0) - unix_timestamp (order_created_ts, ' Yyyy-mm-dd hh:mm:ss. S ') > 2 * 60 * 60 or coalesce ( Unix_timestamp (order_shipped_ts, ' Yyyy-mm-dd hh:mm:ss. S '), 0) - unix_timestamp (order_created_ts, ' Yyyy-mm-dd hh:mm:ss. S ') > 4 * 60 * 60 or coalesce ( Unix_timestamp (order_shipped_ts, ' Yyyy-mm-dd hh:mm:ss. S '), 0) - unix_timestamp (order_created_ts, ' Yyyy-mm-dd hh:mm:ss. S ') > 48 * 60 * 60 );
This article is from the "thick, thin hair, ax" blog, please be sure to keep this source http://tianxingzhe.blog.51cto.com/3390077/1717573
Hive ETL Order Logistics system SQL