Tag:practice move Settings des task esc date mod bandwidth
CREATE TABLE employee (name String,work_place array<string>,--Call style Array_name[0]gender_age Struct<gende R:string, Age:int>,--struct<col_name:type, ...> similar to HBase family, calling style Sruct_name.col_nameskills_score MAP&L T;string, Int>,--Map_name[key]apart_title map<string, array<string>>) row format Delimitedfields Termi nated by "|" Collection items terminated by "," the map Keys terminated by ":";! Table Employee-No!column employee-no describe formatted employee; --with this, the more readable load data local Inpath "/home/centos/hive essential/ch03/employee.txt" overwrite into table employee; # Query the whole array select Work_place from employee; Select Work_place[0] As Col_1, work_place[1] as col_2, work_place[2] as col_3 from employee; #query the whole Mapselect gen Der_age from Employee;select Gender_age.gender, gender_age.age from employee, #query the whole struct and each column in T Able:select Skills_score from Employee;select name, skills_score["DB"] as DB, skills_score["Perl"] as perl,skills_score["Python"] as python,skills_score["Sales"] as sales,skills_score["HR"] as Hrfrom employee; #query composite TypeSelect apart_title from Employee;select name,apart_title["Product"] as Product, apart_title["Test"] as test,apart_title["COE"] as coe,apart_title["Sales"] as Salesfrom EMPLOYEE;DDL operation database Create Database if not exists myhivebookcomment ' CHO3 hive Database in practice '--add a path with DB that describes the location "/hdfs/hive"--hdfs Properties ("Name" = "Mengrui", "date" = "2018-08-20"); show Databases;describe database Myhivebook; --Print out the information of the specified database use Myhivebook;drop db if exists myhivebook; --Delete Empty library drop database if exists myhivebook cascade; --Delete a library with a table ALTER DATABASE Myhivebook--Set Dbproperties ("edited by" = "dog"); Alter DB Myhivebookset owner user Dog; Action table create external Table External_employee (name String,work_place array<string>,--Call style Array_name[0]g Ender_age struct<gender:string, Age:int>,--struct<col_name:type,...> call Style Sruct_name.col_nameskills_score map<string, int>,--Map_name[key]apart_title map<string, Arra y<string>>) Comment "This is a external table"--the attribute position is fixed, otherwise it will error row format Delimitedfields terminated by "|" Collection items terminated by "," Map Keys terminated by ":" Stored as textfile--location "/user/ch03/employee"; --This path cannot contain other folders, otherwise, errors will occur at query time. If the path does not exist, hive automatically creates the path load data local Inpath "/home/centos/hive essential/ch03/employee.txt" overwrite into table external _employee;create temporary table Temporary_name ...??? --ctas copy metadata and data to new Tablecreate table Ctas_employee as SELECT * from external_employee;--create CTE a male named "Mic Hael "Name and select the name of all women CREATE table Cte_employee as--ctaswith R1 as (select name from r2 where name =" Michael "),--cte R2 as (select name from the employee where Agender_age.agender = "Male"), R3 as (select name from employee where AGENDER_AGE.A gender = "Female") SELECT * FROM R1 UNION ALL SELECT * FROM R3;select * from Cte_employee;--Create empty Table//create table Empty_ctas_employee as--ctas will use Mapper, time consuming not recommended//select * FROM employee where 1 = 2; CREATE TABLE Empty_like_employee-use as only metadata replicationlike employee; --like [table or view]--count rows Select COUNT (*) as row_counts from employee;--completely delete the internal table, removes the metadata completely and Moves date to Trash.drop table if exists empty_ctas_employee; --Remove all the rows from a internal tabletruncate table cte_employee; Alter only alters metadata--alter table Renamealter table Internal_employee to Empty_employee;alter table employee set--Add or Update table Properties Tblpro Perties ("comment" = "This is internal table"); ALTER TABLE employee setserdeproperties ("Field.delim" = "$"); ALTER TABLE EMP Loyee setlocation "Hdfs://mycluster/user/hive/warehouse/new_employee"; ---Set the path, hive does not automatically create the path, the path must be an absolute path in HDFs ALTER TABLE External_employee partition (year = 1, month = 1) Enable No_dro P --Block Delete partition table ALTER TABLE external_employee enable Off_line; --block data in the Query partition table (not metadata) alterTable employee concatenate; --merge small files into larger files,only rcfile and Orcfile Formats is supportted right nowalter table employee set fil Eformat Rcfile; --Set file format ALTER TABLE employee SET fileformat textfile;--check column TYPEDESC employee; ALTER TABLE Empty_employee--The following action only changes the metadata, the data must match the updated field Change column name Employee_Name string--The Columnafter W Ork_place; --Move the Columnalter table empty_employee Add columns (wife string); --Add a new column ALTER TABLE Empty_employee replace columns (wife string); --Replace all of the original columns as a single column partition table--Create a partitioned table that greatly reduces the time and bandwidth of the query create table partition_employee (name String,work_place array<string> ,--Call style array_name[0]gender_age struct<gender:string, Age:int>,--struct<col_name:type, ...> tune With style Sruct_name.col_nameskills_score map<string, int>,--Map_name[key]apart_title map<string, array< string>>) partitioned by (year int, month int) row format Delimitedfields terminated by "|" Collection ItemS terminated by "," the map Keys terminated by ":";--check partition show partitions partition_employee; --1) When you first create a table without partitioning, you need to manually add the partition ALTER TABLE Partition_employee addpartition (year =, month =) partition (year =, month = --2) load data into partitionsload data local Inpath "/home/centos/hive essential/ch03/employee.txt". --Local: Loading data from the native file system overwrite into table Partition_employee partition (year =, month = 7);--when querying partition data, you need to set it first: Hive.strict . checks.large.query=falsehive.mapred.mode=nonstrict--Drop the Partitionalter table Partition_employeedrop if exists Partition (year =, month = 7), barrel table--1) Prepare another dataset and table for bucket tablecreate table employee_id (name string,employee_id int,--bucket column work_place array<string>,--Call style array_name[0]gender_age struct <gender:string, Age:int>,--struct<col_name:type, ...> call style Sruct_name.col_nameskills_score Map<strin g, Int>,--Map_name[key]apart_title map<string, array<string>>) row format Delimitedfields terminated by "|" Collection items terminated by "," Map Keys terminated by ":"; load data local Inpath "/home/centos/hive Essential/ch03/empl Oyee_id.txt "overwrite into table employee_id;--2) create bucket tablecreate table Employee_id_buckets (name string, employee_id int,--bucket columnwork_place array<string>,--Call style array_name[0] Gender_age struct<gend Er:string, Age:int>,--struct<col_name:type, ...> call style Sruct_name.col_nameskills_score map<string, INT&G t;,---Map_name[key]apart_title map<string, array<string>>) clustered by (employee_id) into 2 buckets--Bucket capacity Volume: Near blocks of data (256M) barrels: 2Nrow format delimitedfields terminated by "|" Collection items terminated by ","--tuple1,tuple2,... map Keys terminated by ":";--3) Set Map.reducer.max.tasks = 2; The quantity of--reducer equals the number of buckets set hive.enforce.bucketing = true; --4) populate data into Bucketsinsert overwrite table employee_id_buckets--InsertCheck data based on metadata select * from employee_id;--5) Verify the buckets in the Hdfsdfs-ls/user/hive/warehouse/employee_id_bucke TS; View reduces the complexity of queries and increases data security
Reference books
Programming_hive
Apache Hive Essentials
Data definition and description