Ydb Data de-weight

Source: Internet
Author: User
Tags hadoop fs

1. Create Ydb Export Table Vehiclepass_txt_export
Create External Table Vehiclepass_txt_export (
RowKey string, RegionID string, Cityid string, Equipmentcode string, Directioncode string,lanecode string,inouttownflag s Tring,tollcode String,communityid string,tolltypecode string,datacompanycode string,vehicleplatesrc string,speed int , Time String,url string,year string,month string,day string,hour string,week string,dataclassify string,vehiclePlate String,platecolor string,vehiclecolor string,brand string,childbrand string,model string,species string,manufacturer String,headortail string,brandyear string,reliability int,vehicletypecode string,vehicleposition String, Firsttimeincity int,firsttimeincounty int,firsttimeincommunity Int,iword string,ivehicleplate string,analysisData String
) Partitioned by (' Month1 ' string)
Location '/data/mr/vehiclepass_txt_export ';
2. Export the data from the YDB table Vehiclepass partition 201603 to the external table vehiclepass_export_txt
Insert Overwrite table Vehiclepass_txt_export partition (month1= ' 201604 ')
Select
R[0] as RowKey,
R[1] as RegionID,
R[2] as Cityid,
R[3] as Equipmentcode,
R[4] as Directioncode,
R[5] as Lanecode,
R[6] as Inouttownflag,
R[7] as Tollcode,
R[8] as Communityid,
R[9] as Tolltypecode,
R[10] as Datacompanycode,
R[11] as VEHICLEPLATESRC,
R[12] as speed,
R[13] As time,
R[14] as URL,
R[15] as year,
R[16] as month,
R[17] as day,
R[18] as hour,
R[19] as week,
R[20] as Dataclassify,
R[21] as Vehicleplate,
R[22] as Platecolor,
R[23] as Vehiclecolor,
R[24] as Brand,
R[25] as Childbrand,
R[26] as model,
r[27] as species,
R[28] as manufacturer,
R[29] as Headortail,
R[30] as Brandyear,
R[31] as reliability,
R[32] as Vehicletypecode,
R[33] as Vehicleposition,
R[34] as firsttimeincity,
R[35] as Firsttimeincounty,
R[36] as firsttimeincommunity,
R[37] as Iword,
R[38] as Ivehicleplate,
R[39] as Analysisdata
From Ydb where Ysql (' From ', ' select Rowkey,regionid,cityid,equipmentcode,directioncode,lanecode,inouttownflag, Tollcode,communityid,tolltypecode,datacompanycode,vehicleplatesrc,speed,time,url,year,month,day,hour,week, Dataclassify,vehicleplate,platecolor,vehiclecolor,brand,childbrand,model,species,manufacturer,headortail, Brandyear,reliability,vehicletypecode,vehicleposition,firsttimeincity,firsttimeincounty,firsttimeincommunity, Iword,ivehicleplate,analysisdata from vehiclepass where ydbpartion = \ ' 201604\ ', ' segment ');

# # #删除导出表指定分区数据ALTER TABLE vehiclepass_txt_export DROP IF EXISTS PARTITION (month1= ' 201603 ');
3. Save the data in table Vehiclepass_export_txt 201603 to HDFs
/hadoop/bin/hadoop jar Dereplicationyunnan.jar hdfs://10.166.112.190:9000/data/mr/vehiclepass_txt_export/month1= 201603 hdfs://10.166.112.190:9000/data/mr/vehiclepass/201603
To back up the data after the deduplication
Hadoop fs-cp-f/data/mr/vehiclepass/201603/data/mr/vehiclepass_bak
4. Import the deduplication data into the Hive table vehiclepass_hive (and automatically remove the/data/mr/vehiclepass/201603/part-r-00000)
Load data inpath '/data/mr/vehiclepass/201603/part-r-00000 ' into table vehiclepass_hive partition (month1=201603);
5. Clear Ydb data for the corresponding partition
***************************************************
Select COUNT (r[0]) as CNT from
Ydb where Ysql (' From ', ' SELECT count (*) from Vehiclepass where ydbpartion=\ ' 201603\ ' and ydbkv=\ ' ydb.truncate:true\ ', ' Segment ')
***************************************************
http://hwits-master:9999/droppartion?tablename=vehiclepass&partions=201603
Http://hwits-master:9999/droppartion?tablename=vehiclepass&partions=201603&isforce=true

6. Import the data from the Hive table vhiclepass_hive into the Ydb table Vehiclepass
# # #insert into table vehiclepass select *, ' 201603 ', ' from vehiclepass_hive;
# # #将表的字段按列名对应写
INSERT into TABLE vehiclepass SELECT Rowkey,regionid,
Cityid,
Equipmentcode,
Directioncode,
Lanecode,
Inouttownflag,
Tollcode,
Communityid,
Tolltypecode,
Datacompanycode,
VEHICLEPLATESRC,
Speed
Time
Url
Year,
MONTH,
Day,
HOUR,
WEEK,
Dataclassify,
Vehicleplate,
Platecolor,
Vehiclecolor,
Brand
Childbrand,
Model
Species,
Manufacturer,
Headortail,
Brandyear,
Reliability,
Vehicletypecode,
Vehicleposition,
Firsttimeincity,
Firsttimeincounty,
Firsttimeincommunity,
Iword,
Ivehicleplate,
Analysisdata,
' 201603 ',
‘‘
From
vehiclepass_hive;
#######################################################################
ALTER TABLE vehiclepass_hive DROP IF EXISTS PARTITION (month1= ' 201603 ');
Confirm:
Select COUNT (*) from vehiclepass_hive;

#####################
Vehiclepass_hive

CREATE TABLE vehiclepass_hive
(
RowKey String,
RegionID String,
Cityid String,
Equipmentcode String,
Directioncode String,
Lanecode String,
Inouttownflag String,
Tollcode String,
Communityid String,
Tolltypecode String,
Datacompanycode String,
VEHICLEPLATESRC String,
Speed int,
Time String,
URL string,
Year string,
Month string,
Day String,
Hour string,
Week string,
Dataclassify String,
Vehicleplate String,
Platecolor String,
Vehiclecolor String,
Brand String,
Childbrand String,
Model String,
Species String,
Manufacturer String,
Headortail String,
Brandyear String,
reliability int,
Vehicletypecode String,
Vehicleposition String,
firsttimeincity int,
Firsttimeincounty int,
firsttimeincommunity int,
Iword String,
Ivehicleplate String,
Analysisdata string
)
Partitioned by (Month1 STRING)
ROW FORMAT delimited TERMINATED by ', ' STORED as textfile;

Ydb Data de-weight

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.