#
= = Use regular expression = =
Hive (ODS) > select symbol, ' price.* ' from stocks;
= = Table Structure = =
Hive (ODS) >
> Desc emp1;
OK
col_name data_type Comment
name string
salary float
Subordinates Array <string>
Deductions map<string,float>
address struct<street:string,city: String,state:string,zip:int>
Country string
state String
# Partition Information
# col_name data_type Comment
Country string
state string
= = Query array, struct, element in map = =
Hive (ODS) > select name, Subordinates[0], deductions["Shebao", address.city from EMP1;
OK
name _c1 _c2 city
Lucy alucy 100.0 Beijing
Hive (ODS) > select * from EMP1;
OK
emp1.name emp1.salary emp1.subordinates emp1.deductions emp1.address emp1.country emp1.state
Lucy 10000.0 ["Alucy"] {"Shebao": 100.0} {"Street": "Xidan", "City": "Beijing" , "state": "Dong", "Zip": 100000} BJ Shoudu time
taken:0.137 seconds, fetched:1 row (s)
= = Calculate = =
Hive (ODS) >
>
> select upper (Name), salary, deductions["Shebao"], round (Salary * (1-deductions[) Shebao "])) from EMP1;
OK
_c0 salary _c2 _c3
LUCY 10000.0 100.0 -990000.0 time
taken:0.187 Seconds, Fetched:1 row (s)
= = Aggregate query = =
Hive (ODS) >
>
>
> select COUNT (*), AVG (salary) from EMP1;
= = Set parameters to improve aggregation performance = =
Set hive.map.aggr=true;
Hive (ODS) >
> select count (Distinct symbol) from EMP1;
Table Generation Functions
Hive row Split lateral view explode (col3) col3 as name
Generates a row for each element in the explode (ARRAY) List
Explode (map) map in each key-value pair, generate a row, key is a column, value is a column
Hive (ODS) >
> CREATE TABLE explode_test (
> col1 string,
> col2 string,
> Col3 string
>)
> Row format delimited fields terminated by ' \ t '
> stored as textfile;
OK time
taken:0.207 seconds
Hive (ODS) >
>
> Load Data local inpath '/home/hadoop/study_ Hadoop/explode.txt ' into table explode_test;
Loading data to table ods.explode_test
table ods.explode_test Stats: [Numfiles=1, totalsize=20]
OK
time taken:0.622 seconds
Hive (ODS) > select *from explode_test;
OK
explode_test.col1 explode_test.col2 explode_test.col3
a b
c D 4,5,6
Time taken:0.121 seconds, Fetched:2 row (s)
= = iterates through each column in the array = = one
Hive (ODS) > select col1, col2, name
> from explode_test
> Lateral view explode (split (col3, ', ')) Col3 as N Ame;
OK
col1 col2 name
a b 1
a b 2
a b 3
c D 4
C D 5
c D 6 time
taken:0.124 seconds, Fetched:6 row (s)
two times each column in the group
Hive (ODS) > > > CREATE Table Hzl_test > > (>
> col1 string, > > col2 string, > > Col3 array<int> > >) > > Row Format Delimited > > Fields
Terminated by ' \ t ' > collection items terminated by ', '; OK Time taken:0.135 seconds Hive (ODS) > > > Load Data local inpath '/home/hadoop/study_had
Oop/explode.txt ' into table hzl_test; Loading data to Table Ods.hzl_test table ods.hzl_test Stats: [Numfiles=1, totalsize=20] OK time taken:0.466 seconds hive
(ODS) > select * from Hzl_test; OK hzl_test.col1 Hzl_test.col2 hzl_test.col3 a b [] C d [4,5,6] time taken:0.117 seconds , Fetched:2 row (s) hive (ODS) > > > > select Col1,col2,name > Fr OM hzl_test > Lateral view explode (col3) col3 as name; OK col1 col2 name a B 1 A B 2 a B 3 C D 4 C D 5 C D 6 time taken:0.12 seconds, Fetched:6 row (s)
= = Add: = =
Hive (ODS) >
>
> select t.list[0],t.list[1],t.list[2] FROM (
> select (Split (col3, ', ')) List from Explode_test) T;
OK
_c0 _c1 _c2
1 2 3
4 5 6
= = View Array length size==
Hive (ODS) >
>
> select Size (Split (col3, ', ')) list from explode_test;
OK
List
3
3
Hive Column Career concat_ws (', ', Collect_set (col3))
Hive (ODS) >
>
> select * from Tmp_jiangzl_test;
OK
tmp_jiangzl_test.col1 tmp_jiangzl_test.col2 tmp_jiangzl_test.col3
a b 1
a b 2
a b 3
c d 4
c D 5
c D 6 time
taken: 0.116 seconds, Fetched:6 row (s)
hive (ODS) >
>
> select Col1,col2,concat_ws (', ', Collect_set ( COL3)
> from tmp_jiangzl_test
> Group by col1,col2;
col1 col2 _c2
a b /
c D 4,5,6 time
taken:34.791 seconds, fetched : 2 row (s)
Parse_url_tuple
Hive (ODS) > select * from T_url; OK t_url.f1 t_url.f2 url1 http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1 url2 https://cwiki.apache.org/ Confluence/display/hive/languagemanual+udf#languagemanualudf-getjsonobject url3 https://www.google.com.hk/#hl = Zh-cn&newwindow=1&safe=strict&q=hive+translate+example&oq=hive+translate+example&gs_l= SERP.3 ... 10174.11861.6.12051.8.8.0.0.0.0.132.883.0j7.7.0 ... 0.0...1c.1j4.8.serp.0b9c1t_n0hs&bav=on.2,or.&bvm=bv.44770516,d.agc&fp=e13e41a6b9dab3f6&biw= 1241&bih=589 time taken:0.122 seconds, Fetched:3 row (s) hive (ODS) > select f1,b.* from t_url lateral view Parse_u
Rl_tuple (F2, ' HOST ', ' PATH ', ' querty ', ' querty:k1 ') b as host,path,querty,querty_id; OK F1 b.host b.path b.querty b.querty_id url1 facebook.com/path1/p.php null null URL2 Cwiki. APACHE.ORG/CONFLUENCE/DISPLAY/HIVE/LANGUAGEMANUAL+UDF NULL null URL3 Www.google.com.hk/NUL L NULL TIME taken:0.142 seconds, Fetched:3 row (s)
# Lateral View is a conjunction provided to UDTF in hive that resolves an issue where UDTF cannot add additional select columns. When we want to split a column in a hive table, we want to convert it to a pattern of 1 to N, that is, one row to many columns. Hive does not allow us to add additional SELECT statements outside of the UDTF function. Get_json_object
Hive (ODS) > select Get_json_object (t_json.f2, ' $.owner ') from T_json;
OK
_c0
amy1
amy2
amy3 time
taken:0.106 seconds, Fetched:3 row (s)
hive (ODS) >
>
> select * from T_json;
OK
t_json.f1 t_json.f2 t_json.f3
first {"store": {"fruit": [{"Weight": 8, "type": "Apple"} {"Weight": 9, "type": "Pear"}], "bicycle": {"Price": 19.951, "color": "Red1"}}, "email": "Amy@only_for_json_udf_ Test.net "," owner ":" Amy1 "} third
first {" store ": {" fruit ": [{" Weight ": 9," type ":" Apple "},{" weight " : "Type": "Pear"}], "bicycle": {"Price": 19.952, "color": "Red2"}}, "email": "Amy@only_for_json_udf_test.net", "owner ":" Amy2 "} third
first {" store ": {" fruit ": [{" Weight ": Ten," type ":" Apple "},{" weight ": 911," type ":" Pear "}]," bicycle ": {" Price ": 19.953," color ":" Red3 "}}," email ":" Amy@only_for_json_udf_test.net "," owner ":" Amy3 "} Third time
taken:0.102 seconds, Fetched:3 row (s)
hive (ODS) >
URL parsing function: Parse_url
Hive (ODS) >
>
>
> select Parse_url (' Https://www.baidu.com/s?cl=3&tn=baidutop10&fr =top1000&wd=%e8%bf%aa%e5%a3%ab%e5%b0%bc%e6%94%b6%e8%b4%ad%e7%a6%8f%e5%85%8b%e6%96%af&rsv_idx=2 ', ' HOST ' ) from dual;
OK
_c0
www.baidu.com
other built-in functions
Regular Expressions
Regular expression substitution function: Regexp_replace
Hive (ODS) >
> select regexp_replace (' Foobar ', ' oo|ar ', ') from dual;
OK
_c0
fb time
taken:0.112 seconds, fetched:1 row (s)
hive (ODS) > select Regexp_replace (' Foobar ', ' oo|ar ', '-') from dual;
OK
_c0
f-b-time
taken:0.105 seconds, fetched:1 row (s)
hive (ODS) >
Regular expression parsing function: Regexp_extract
Hive (ODS) > select Regexp_extract (' Foothebar ', ' foo (. *?) (bar) ', 1) from dual;
OK
_c0
The time taken:0.105 seconds, fetched:1 row (s)
hive (ODS) > select Regexp_extract (' Foothebar ', ' foo (. *?) (bar) ', 0) from dual;
OK
_c0
foothebar time
taken:0.104 seconds, fetched:1 row (s)
hive (ODS) > select Regexp_extract (' Foothebar ', ' foo (. *?) (bar) ', 2) from dual;
OK
_c0
Bar
Limit Statement
Hive (ODS) > select * from staged_employees ORDER by ID limit 3;
Nested SELECT statements
Hive (ODS) >
>
> from (
> select upper (Emp1.name) as name, Emp1.subordinates[0] as sub, emp1.salar Y, emp1.deductions["Shebao"] as Shebao, emp1.address.city
> from EMP1) e
> select E.name, e.sub,e.salary,e . Shebao;
OK
e.name e.sub e.salary e.shebao
LUCY alucy 10000.0 100.0 time
taken:0.166 Seconds, Fetched:1 row (s)
Case
Hive (ODS) > select * from EMP1;
OK
emp1.name emp1.salary emp1.subordinates emp1.deductions emp1.address emp1.country Emp1.state
Lucy 10000.0 ["Alucy"] {"Shebao": 100.0} {"Street": "Xidan", "City": "Beijing", "state": "Dong", "Zip": 100000} BJ Shoudu time
taken:0.109 seconds, fetched:1 row (s)
hive (ODS) > select name, Salary,
> When salary < "low"
> when salary >= and salary <=5000 then ' middle ' C16/>> when salary >5000 and salary <10000 "high"
> Else ' very high '
> End as Bracket
& Gt from EMP1;
OK
name salary bracket
Lucy 10000.0 very high time
taken:0.3 seconds, fetched:1 row (s)
Hive (ODS) >
set Local mode set Hive.exec.mode.local.auto = true;
Column AliasesYou cannot use only column aliases in the WHERE clause, but you can use nested SELECT statements
Like rlike
Time taken:0.141 seconds, Fetched:4 row (s)
hive (ODS) >
>
> select Emp1.address.street from Emp1 whe Re emp1.address.street like '%dong% ';
OK
Street
Dongdan
dongdan time
taken:0.103 seconds, Fetched:2 row (s)
hive (ODS) > select Emp1.address.street from EMP1 where Emp1.address.street rlike '. *dong| Houhai.* ';
OK
Street
Dongdan
Dongdan
houhai time
taken:0.164 seconds, Fetched:3 row (s)
group by has
Hive (ODS) > select Aaa,symbol,ymd,count (*) from STOCKS1 GROUP by AAA,SYMBOL,YMD have Count (*) >1; Query ID = hadoop_20171218101126_b400d584-6699-447f-8011-1aeb3019a1de Total jobs = 1 Launching Job 1 out of 1 number of RE Duce tasks not specified. Estimated from-input data size:1 in order-to-change the average-load for a-reducer (in bytes): Set hive.exec.reducers.b Ytes.per.reducer=<number> in order to limit the maximum number of Reducers:set hive.exec.reducers.max=<number&
Gt In order to set a constant number of reducers:set mapreduce.job.reduces=<number> starting job = job_1513562135174 _0002, Tracking URL = Http://master:8088/proxy/application_1513562135174_0002/Kill Command =/home/hadoop/ Hadoop-2.6.4/bin/hadoop Job-kill job_1513562135174_0002 Hadoop job information for Stage-1: number of mappers:1; Number of reducers:1 2017-12-18 10:11:38,117 Stage-1 map = 0, reduce = 0% 2017-12-18 10:11:49,277 Stage-1 map = 100%, reduce = 0, Cumulative CPU 1.72 sec 2017-12-18 10:12:02,299 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 4.86 SEC MapReduce Total Cumulative CPU T Ime:4 seconds