1. Create Ydb Export Table Vehiclepass_txt_export
Create External Table Vehiclepass_txt_export (
RowKey string, RegionID string, Cityid string, Equipmentcode string, Directioncode string,lanecode string,inouttownflag s Tring,tollcode String,communityid string,tolltypecode string,datacompanycode string,vehicleplatesrc string,speed int , Time String,url string,year string,month string,day string,hour string,week string,dataclassify string,vehiclePlate String,platecolor string,vehiclecolor string,brand string,childbrand string,model string,species string,manufacturer String,headortail string,brandyear string,reliability int,vehicletypecode string,vehicleposition String, Firsttimeincity int,firsttimeincounty int,firsttimeincommunity Int,iword string,ivehicleplate string,analysisData String
) Partitioned by (' Month1 ' string)
Location '/data/mr/vehiclepass_txt_export ';
2. Export the data from the YDB table Vehiclepass partition 201603 to the external table vehiclepass_export_txt
Insert Overwrite table Vehiclepass_txt_export partition (month1= ' 201604 ')
Select
R[0] as RowKey,
R[1] as RegionID,
R[2] as Cityid,
R[3] as Equipmentcode,
R[4] as Directioncode,
R[5] as Lanecode,
R[6] as Inouttownflag,
R[7] as Tollcode,
R[8] as Communityid,
R[9] as Tolltypecode,
R[10] as Datacompanycode,
R[11] as VEHICLEPLATESRC,
R[12] as speed,
R[13] As time,
R[14] as URL,
R[15] as year,
R[16] as month,
R[17] as day,
R[18] as hour,
R[19] as week,
R[20] as Dataclassify,
R[21] as Vehicleplate,
R[22] as Platecolor,
R[23] as Vehiclecolor,
R[24] as Brand,
R[25] as Childbrand,
R[26] as model,
r[27] as species,
R[28] as manufacturer,
R[29] as Headortail,
R[30] as Brandyear,
R[31] as reliability,
R[32] as Vehicletypecode,
R[33] as Vehicleposition,
R[34] as firsttimeincity,
R[35] as Firsttimeincounty,
R[36] as firsttimeincommunity,
R[37] as Iword,
R[38] as Ivehicleplate,
R[39] as Analysisdata
From Ydb where Ysql (' From ', ' select Rowkey,regionid,cityid,equipmentcode,directioncode,lanecode,inouttownflag, Tollcode,communityid,tolltypecode,datacompanycode,vehicleplatesrc,speed,time,url,year,month,day,hour,week, Dataclassify,vehicleplate,platecolor,vehiclecolor,brand,childbrand,model,species,manufacturer,headortail, Brandyear,reliability,vehicletypecode,vehicleposition,firsttimeincity,firsttimeincounty,firsttimeincommunity, Iword,ivehicleplate,analysisdata from vehiclepass where ydbpartion = \ ' 201604\ ', ' segment ');
# # #删除导出表指定分区数据ALTER TABLE vehiclepass_txt_export DROP IF EXISTS PARTITION (month1= ' 201603 ');
3. Save the data in table Vehiclepass_export_txt 201603 to HDFs
/hadoop/bin/hadoop jar Dereplicationyunnan.jar hdfs://10.166.112.190:9000/data/mr/vehiclepass_txt_export/month1= 201603 hdfs://10.166.112.190:9000/data/mr/vehiclepass/201603
To back up the data after the deduplication
Hadoop fs-cp-f/data/mr/vehiclepass/201603/data/mr/vehiclepass_bak
4. Import the deduplication data into the Hive table vehiclepass_hive (and automatically remove the/data/mr/vehiclepass/201603/part-r-00000)
Load data inpath '/data/mr/vehiclepass/201603/part-r-00000 ' into table vehiclepass_hive partition (month1=201603);
5. Clear Ydb data for the corresponding partition
***************************************************
Select COUNT (r[0]) as CNT from
Ydb where Ysql (' From ', ' SELECT count (*) from Vehiclepass where ydbpartion=\ ' 201603\ ' and ydbkv=\ ' ydb.truncate:true\ ', ' Segment ')
***************************************************
http://hwits-master:9999/droppartion?tablename=vehiclepass&partions=201603
Http://hwits-master:9999/droppartion?tablename=vehiclepass&partions=201603&isforce=true
6. Import the data from the Hive table vhiclepass_hive into the Ydb table Vehiclepass
# # #insert into table vehiclepass select *, ' 201603 ', ' from vehiclepass_hive;
# # #将表的字段按列名对应写
INSERT into TABLE vehiclepass SELECT Rowkey,regionid,
Cityid,
Equipmentcode,
Directioncode,
Lanecode,
Inouttownflag,
Tollcode,
Communityid,
Tolltypecode,
Datacompanycode,
VEHICLEPLATESRC,
Speed
Time
Url
Year,
MONTH,
Day,
HOUR,
WEEK,
Dataclassify,
Vehicleplate,
Platecolor,
Vehiclecolor,
Brand
Childbrand,
Model
Species,
Manufacturer,
Headortail,
Brandyear,
Reliability,
Vehicletypecode,
Vehicleposition,
Firsttimeincity,
Firsttimeincounty,
Firsttimeincommunity,
Iword,
Ivehicleplate,
Analysisdata,
' 201603 ',
‘‘
From
vehiclepass_hive;
#######################################################################
ALTER TABLE vehiclepass_hive DROP IF EXISTS PARTITION (month1= ' 201603 ');
Confirm:
Select COUNT (*) from vehiclepass_hive;
#####################
Vehiclepass_hive
CREATE TABLE vehiclepass_hive
(
RowKey String,
RegionID String,
Cityid String,
Equipmentcode String,
Directioncode String,
Lanecode String,
Inouttownflag String,
Tollcode String,
Communityid String,
Tolltypecode String,
Datacompanycode String,
VEHICLEPLATESRC String,
Speed int,
Time String,
URL string,
Year string,
Month string,
Day String,
Hour string,
Week string,
Dataclassify String,
Vehicleplate String,
Platecolor String,
Vehiclecolor String,
Brand String,
Childbrand String,
Model String,
Species String,
Manufacturer String,
Headortail String,
Brandyear String,
reliability int,
Vehicletypecode String,
Vehicleposition String,
firsttimeincity int,
Firsttimeincounty int,
firsttimeincommunity int,
Iword String,
Ivehicleplate String,
Analysisdata string
)
Partitioned by (Month1 STRING)
ROW FORMAT delimited TERMINATED by ', ' STORED as textfile;
Ydb Data de-weight