-- Environment: 64-bit 11.2G -- 1. CREATE a test TABLE and generate 20 million test data. 2 million of the test data is duplicated in create table test_t (id NUMBER (8) not null primary key, name VARCHAR2 (32); begin for I IN 1 .. 18000000 loop insert into test_t (id, name) VALUES (I, SYS_GUID (); END LOOP; COMMIT; END;/INSERT INTO test_t (id, name) SELECT 18000000 + (id/9), name FROM test_t where mod (id, 9) = 0; COMMIT; create index idx_test_anme ON test_t (name); analyze table test_t Compute statistics for table for all indexes for all indexed columns; -- 2. Comparison between the two de-duplication schemes; solution 1: create an intermediate TABLE, generate a secondary object, and delete the original TABLE, change the TABLE name to create table tempAS SELECT * FROM test_t a WHERE. ROWID = (select min (B. ROWID) FROM test_t B WHERE. name = B. name); -- Time: 08: 14.79 create index idx ON temp (name); -- Time: 02: 45.73 alter table temp add constraint temp_pk primary key (id); -- Time: 01: 15.79 -- total time: 12 minutes and 16 seconds -- solution 2: delete the duplicate data in the original table. DELE TE test_t a WHERE. ROWID> (select min (B. ROWID) FROM test_t B WHERE. name = B. name); -- time used: 2 hours out, interrupted to exit.