Drop table test;
Select count (*) from test;
-- Create a test table
Create Table Test
(
ID number (9 ),
Nick varchar2 (30)
);
-- Insert Test Data
Begin
For I in 1 .. 100000 Loop
Insert into test (ID) values (I );
End loop;
Commit;
End;
Select * from test;
-- Update the Nick field to cause severe data skew.
Update Test Set Nick = 'abc' where rownum <99999;
-- Create an index
Create index idx_test_nick on test (Nick );
Update Test Set Nick = 'def 'where Nick is null;
-- Only analyze Indexes
Analyze index idx_test_nick compute statistics;
Select * From user_indexes;
-- View the index name, corresponding to the stored data block, the number of keys, and the number of records (number of rows ).
Select index_name, leaf_blocks, distinct_keys, num_rows
From user_indexes
Where index_name = 'idx _ test_nick ';
-- Dba_tab_col_statistics
-- View the statistical information of a table
Select column_name, num_buckets, num_distinct
From user_tab_columns
Where table_name = 'test ';
Select * from test where Nick = 'abc ';
Execution Plan
----------------------------------------------------------
0 SELECT statement optimizer = choose
1 0 Table Access (by index rowid) of 'test'
2 1 index (range scan) of 'idx _ test_nick '(NON-UNIQUE)
Select * from test where Nick = 'def ';
Execution Plan
----------------------------------------------------------
0 SELECT statement optimizer = choose
1 0 Table Access (by index rowid) of 'test'
2 1 index (range scan) of 'idx _ test_nick '(NON-UNIQUE)
-- We can see from the above that after the index analysis, the SQL Execution path is based on rules, and the index field offset
-- First find the rowid Based on the index and then read the record based on the rowid. This process must be slower than reading the record during full table scan.
-- User_part_col_statistics partition analysis information
-- Nick in the second column of the analysis table
Analyze table test compute statistics for columns size 2 Nick;
Select * from test where Nick = 'abc ';
Execution Plan
----------------------------------------------------------
0 SELECT statement optimizer = choose
1 0 Table Access (by index rowid) of 'test'
2 1 index (range scan) of 'idx _ test_nick '(NON-UNIQUE)
-- Based on the above Execution Plan, it is still executed according to the rules.
-- Analysis table
Analyze table test compute statistics for table;
Select * from test where Nick = 'abc ';
Execution Plan
----------------------------------------------------------
0 SELECT statement optimizer = choose (cost = 49 card = 99998 bytes =
1499970)
1 0 Table Access (full) of 'test' (cost = 49 card = 99998 bytes = 14
99970)
-- After the analysis table, the statement is executed based on the cost.
-- Delete all statistics and analyze only tables and columns without indexing,
-- Oracle uses the CBO optimizer and generates a correct execution plan
Analyze table test Delete statistics;
-- Analyze column Nick
Analyze table test compute statistics for table for columns size 2 Nick;
Select * from test where Nick = 'abc ';
Execution Plan
----------------------------------------------------------
0 SELECT statement optimizer = choose (cost = 49 card = 99998 bytes =
1499970)
1 0 Table Access (full) of 'test' (cost = 49 card = 99998 bytes = 14
99970)
--
Select * from test where Nick = 'def ';
Execution Plan
----------------------------------------------------------
0 SELECT statement optimizer = choose (cost = 2 card = 2 bytes = 30)
1 0 Table Access (by index rowid) of 'test' (cost = 2 card = 2 BYT
Es = 30)
2 1 index (range scan) of 'idx _ test_nick '(NON-UNIQUE) (Cost
= 1 card = 2)
-- Create an index on the ID column of the test table, but do not analyze the index
Create index idx_test_id on test (ID );
Execution Plan
----------------------------------------------------------
0 SELECT statement optimizer = choose (cost = 2 card = 1000 bytes = 15
000)
1 0 Table Access (by index rowid) of 'test' (cost = 2 card = 1000
Bytes = 15000)
2 1 index (range scan) of 'idx _ test_id '(NON-UNIQUE) (cost = 1
Card = 400)
-- When the condition contains both ID and Nick, because Nick has a histogram, Oracle knows that Nick = 'abc' has many values, so it does not take the idx_test_nick index, take the index on idx_test_id
Select * from test where id = 5 and Nick = 'abc ';
Execution Plan
----------------------------------------------------------
0 SELECT statement optimizer = choose (cost = 2 card = 1000 bytes = 15
000)
1 0 Table Access (by index rowid) of 'test' (cost = 2 card = 1000
Bytes = 15000)
2 1 index (range scan) of 'idx _ test_id '(NON-UNIQUE) (cost = 1
Card = 400)
-- When the condition contains both ID and Nick, because Nick has a histogram, Oracle knows that Nick = 'def 'has very few values, so the index on idx_test_nick is used, do not go through the idx_test_id Index
Select * from test where id = 5 and Nick = 'def ';
Execution Plan
----------------------------------------------------------
0 SELECT statement optimizer = choose (cost = 2 card = 1 bytes = 15)
1 0 Table Access (by index rowid) of 'test' (cost = 2 card = 1 BYT
Es = 15)
2 1 index (range scan) of 'idx _ test_nick '(NON-UNIQUE) (Cost
= 1 card = 2)
Select * from test where Nick = 'def 'and ID = 5;
Execution Plan
----------------------------------------------------------
0 SELECT statement optimizer = choose (cost = 2 card = 1 bytes = 15)
1 0 Table Access (by index rowid) of 'test' (cost = 2 card = 1 BYT
Es = 15)
2 1 index (range scan) of 'idx _ test_nick '(NON-UNIQUE) (Cost
= 1 card = 2)
-- After the ID column is analyzed, Oracle finds that the ID column has a higher selection degree, so idx_test_nick index is not selected, but idx_test_id.
Analyze table test compute statistics for columns size 1 ID;
Select * from test where id = 5 and Nick = 'def ';
Execution Plan
----------------------------------------------------------
0 SELECT statement optimizer = choose (cost = 2 card = 1 bytes = 7)
1 0 Table Access (by index rowid) of 'test' (cost = 2 card = 1 BYT
Es = 7)
2 1 index (range scan) of 'idx _ test_id '(NON-UNIQUE) (cost = 1
Card = 1)
/*
Next, let's look at another situation where we delete all the statistics and create a unique index on the ID column. Under this condition,
Only analyze the table and analyze the column Nick. We can see that Oracle adopts the correct execution plan,
Taking uk_test_id, it actually brings us a lot of inspiration:
Do you need a histogram for columns with primary key and unique key constraints?
If the query where ID> 100 and ID <1000,
We still need a histogram, but it seems that there is no need for a histogram!
*/
Analyze table test Delete statistics;
Drop index idx_test_id;
Create unique index uk_test_id on test (ID );
-- Nick in the second column of the analysis table
Analyze table test compute statistics for table for columns size 2 Nick;
Select * from test where id = 5 and Nick = 'def ';
Execution Plan
----------------------------------------------------------
0 SELECT statement optimizer = choose (cost = 2 card = 1 bytes = 15)
1 0 Table Access (by index rowid) of 'test' (cost = 2 card = 1 BYT
Es = 15)
2 1 index (unique scan) of 'uk _ test_id '(unique) (cost = 1 car
D = 100000)
From the above series of experiments, we can see that for the Oracle optimizer CBO, Table Analysis and column analysis are the most important, and index analysis is the second. In addition, we can consider which columns require histograms. For the number of buckets, the default Oracle value is 75, so according to your application rules, selecting the appropriate number of buckets is also helpful for performance. The increase in the number of unnecessary buckets will inevitably lead to the complexity of execution plans when SQL statements are hard parsed.
Full calculation method: Analyze table ABC compute statistics;
Sample estimation method (sample 20%): Analyze table ABC estimate statistics sample 20 percent
The full Calculation of a table takes the same time as a full table scan. The sampling estimation method uses sampling to generate statistics faster than the full calculation method. If accurate data is not required, sampling analysis is recommended. We recommend that you use sample Estimation for table analysis and use full Calculation for index analysis.
We can use the following methods to regularly analyze and generate statistical information for database tables and indexes and cluster tables to ensure the normal performance of applications.
Generate Index Analysis and SQL statement for table analysis: