/* -- Note: Prepare Data (skip this step, which is time consuming) Create table CHECK1_T1 ( Id int, C1 CHAR (8000) ) Create table CHECK1_T2 ( Id int, C1 CHAR (8000) ) DECLARE @ I INT SET @ I = 1 While I <= 10000 BEGIN Insert into CHECK1_T1 SELECT @ I, 'C1' Insert into CHECK1_T2 SELECT 10000 + @ I, 'C1' SET @ I = @ I + 1 END Create table CHECK2_T1 ( Id int, C1 CHAR (8000) ) DECLARE @ I INT SET @ I = 1 While I <= 10000 BEGIN Insert into CHECK2_T1 SELECT @ I, 'C1' SET @ I = @ I + 1 END Insert into CHECK2_T1 VALUES (10001, 'C2 ') Insert into CHECK2_T1 VALUES (10002, 'C1 ') Create table CHECK3_T1 ( Id int, C1 CHAR (7000) ) Create table CHECK3_T2 ( Id int, C1 CHAR (7000) ) DECLARE @ I INT SET @ I = 1 While I <= 20000 BEGIN IF @ I % 2 = 0 BEGIN Insert into CHECK3_T1 SELECT @ I, 'C1' END ELSE BEGIN Insert into CHECK3_T1 SELECT @ I, 'C2' END IF @ I % 100 = 0 BEGIN Insert into CHECK3_T2 SELECT @ I, 'C1' Insert into CHECK3_T2 SELECT @ I + 50000, 'C2' END SET @ I = @ I + 1 END Create table CHECK4_T1 ( Id int, C1 CHAR (500 ), )
DECLARE @ I INT SET @ I = 1 While I <= 500000 BEGIN IF @ I % 100000 = 0 BEGIN Insert into CHECK4_T1 SELECT @ I, 'C2' END ELSE BEGIN Insert into CHECK4_T1 SELECT @ I, 'C1' END SET @ I = @ I + 1 END Create nonclustered index NCIX_C1 ON CHECK4_T1 (C1) Create table CHECK5_T1 ( Id int, C1 CHAR (10 ), ) DECLARE @ I INT SET @ I = 1 While I <= 10000 BEGIN Insert into CHECK5_T1 SELECT @ I, 'C1' IF @ I % 2 = 0 BEGIN Insert into CHECK5_T1 SELECT @ I, 'C1' END SET @ I = @ I + 1 END
*/ -- ============================================ -- 1. Union all replaces Union DBCC DROPCLEANBUFFERS DBCC FREEPROCCACHE
-- Test 1: (26 s) execution plan: table scan-> sort-> merge join Select id, C1 FROM CHECK1_T1 -- 1 W data records UNION Select id, C1 FROM CHECK1_T2 -- 1 W data records -- Test 2: (4S) execution plan: table scan-> table scan series Select id, C1 FROM CHECK1_T1 -- 1 W data records UNION ALL Select id, C1 FROM CHECK1_T2 -- 1 W data records -- Conclusion: it is quite time-consuming to sort and deduplicate the union statement in test center. If this function is not required, it is best to add ALL in big data. -- ============================================ -- 2. Exists instead of Count (*) DBCC DROPCLEANBUFFERS DBCC FREEPROCCACHE ---- Test 1: (7 s) execution plan: table scan-> stream aggregation-> computing vector DECLARE @ COUNT INT SELECT @ COUNT = COUNT (*) FROM CHECK2_T1 WHERE C1 = 'C1' -- 1 W data records IF @ COUNT> 0 BEGIN PRINT's' END ---- Test 2: (0 s) execution plan: constant scan/table scan-> nested loop-> calculate scalar If exists (SELECT 1 FROM CHECK2_T1 WHERE C1 = 'C1') -- 1 W data records BEGIN PRINT's' END -- Conclusion: use Exist to determine whether the table exists. It is not necessary to use COUNT (*) to COUNT all records of the table and scan it once. -- ============================================ -- 3. Replace IN (Select COL1 From Table) DBCC DROPCLEANBUFFERS DBCC FREEPROCCACHE -- Test 1: (3 s) execution plan: table scan-> hash matching Select id, C1 FROM CHECK3_T2 -- 400 rows Where id in (select id from CHECK3_T1 WHERE C1 = 'C1') -- 2W rows -- Test 2: (1 s) execution plan: table scan-> parallelism-> bitmap-> sort-> merge join-> parallelism Select a. ID, A. C1 FROM CHECK3_T2 Inner join CHECK3_T1 B on a. ID = B. id where B. C1 = 'C1' -- Test 3: (3 s) execution plan: table scan-> hash matching Select a. ID, A. C1 FROM CHECK3_T2 Where exists (SELECT 1 FROM CHECK3_T1 B where B. ID = A. id and B. C1 = 'C1 ') -- Conclusion: use inner join as much as possible. SQL SERVER optimizes the joined table during query. -- ============================================ -- 4. Not Exists instead of Not In -- Test 1: (8 s) execution plan: table scan-> nested loop-> hash matching Select id, C1 FROM CHECK3_T1 -- 2W rows Where id not in (select id from CHECK3_T2 WHERE C1 = 'C1') -- 400 rows -- Test 2: (4S) execution plan: table scan-> hash matching Select a. ID, A. C1 FROM CHECK3_T1 Where not exists (SELECT 1 FROM CHECK3_T2 B where B. ID = A. id and B. C1 = 'C1 ') -- Conclusion: Do NOT use not in as much as possible because nested loops are called. We recommend that you use not exists instead of not in. -- ============================================ -- 5. Avoid using any function in the condition column Drop table CHECK4_T1 Create nonclustered index NCIX_C1 ON CHECK4_T1 (C1) -- add a non-clustered INDEX --- Test 1 (4S) execution plan: index scan SELECT * FROM CHECK4_T1 where rtrim (C1) = 'C2' --- Test 2: (0 s) execution plan: index search SELECT * FROM CHECK4_T1 WHERE C1 = 'C2' -- Conclusion: using a function for the index field in the where condition will convert the index search into an index scan, greatly reducing the query efficiency. -- ============================================ -- 6. Execute dynamic SQL with sp_executesql DBCC DROPCLEANBUFFERS DBCC FREEPROCCACHE Create proc UP_CHECK5_T1 ( @ ID INT ) AS SET NOCOUNT ON DECLARE @ count INT, @ SQL NVARCHAR (4000) SET @ SQL = 'SELECT @ count = count (*) FROM CHECK5_T1 WHERE ID = @ ID' EXEC sp_executesql @ SQL, N'@ count int output, @ ID Int ', @ Count OUTPUT, @ ID PRINT @ count Create proc UP_CHECK5_T2 ( @ ID INT ) AS SET NOCOUNT ON DECLARE @ SQL NVARCHAR (4000) SET @ SQL = 'Clare @ count INT; SELECT @ count = count (*) FROM CHECK5_T1 WHERE ID = '+ CAST (@ ID AS VARCHAR (10) + '; PRINT @ count' EXEC (@ SQL) --- Test 1: instantaneous DECLARE @ N INT SET @ N = 1 WHILE @ N <= 1000 BEGIN EXEC UP_CHECK5_T1 @ N SET @ N = @ N + 1 END
--- Test 2: 2 s DECLARE @ N INT SET @ N = 1 WHILE @ N <= 1000 BEGIN EXEC UP_CHECK5_T2 @ N SET @ N = @ N + 1 END Create clustered index CIX_ID ON CHECK5_T1 (ID) DBCC DROPCLEANBUFFERS DBCC FREEPROCCACHE -- View Cache plan SELECT a. size_in_bytes 'Byte count ', Total_elapsed_time/execution_count 'average time ', Total_logical_reads/execution_count 'logical read ', Usecounts 'reuse times ', SUBSTRING (d. text, (statement_start_offset/2) + 1, (CASE statement_end_offset WHEN-1 then datalength (text) ELSE statement_end_offset END-statement_start_offset)/2) + 1) 'statement' FROM sys. dm_exec_cached_plans CROSS apply sys. dm_exec_query_plan (a. plan_handle) c, Sys. dm_exec_query_stats B CROSS apply sys. dm_exec_ SQL _text (B. SQL _handle) d WHERE a. plan_handle = B. plan_handle Order by total_elapsed_time/execution_count DESC; -- Conclusion: by executing the following cache plan, we can see that the first method completely uses the cache plan and the query has achieved good results; -- The second method wastes the cache plan and causes the cache to quickly become full. This method is rather undesirable. -- ============================================ -- 7. Substitution of Left Join -- Test an execution plan: table scan-> hash matching Select a. ID, A. C1 FROM CHECK3_T1 A -- 2W rows Left join CHECK3_T2 B on a. ID = B. id where B. C1 = 'C1' -- Row 400 -- Test 2 execution plan: table scan-> hash matching Select a. ID, A. C1 FROM CHECK3_T1 Right join CHECK3_T2 B on a. ID = B. ID WHERE a. C1 = 'C1' -- Test the three execution plans: table scan-> hash matching Select a. ID, A. C1 FROM CHECK3_T1 Inner join CHECK3_T2 B on a. ID = B. id where B. C1 = 'C1' -- Conclusion: the three statements are exactly the same in the execution plan. They all follow the inner join plan, -- In test 1 and Test 2, the WHERE statement contains fields in the LEFT and RIGHT Tables. If SQLSERVER finds that only fields in the table are available, it automatically performs processing based on inner join. -- Supplemental test: (1 s) execution plan: table scan-> parallelism-> bitmap-> sort-> merge join-> parallelism Select a. ID, A. C1 FROM CHECK3_T2 A -- Row 400 Inner join CHECK3_T1 B on a. ID = B. ID WHERE A. C1 = 'C1' -- 2W rows -- Conclusion: there is an interesting point here. If the data in the master table and the associated table are very different, the execution plan is another way. -- ============================================ -- 8, ON (a. id = B. id AND a. tag = 3) -- Test 1 Select a. ID, A. C1 FROM CHECK3_T1 Inner join CHECK3_T2 B on a. ID = B. ID AND A. C1 = 'C1' -- Test 2 Select a. ID, A. C1 FROM CHECK3_T1 Inner join CHECK3_T2 B on a. ID = B. ID WHERE A. C1 = 'C1' -- Conclusion: inner join: both the filtering conditions of the left and right tables can be placed in the WHERE clause. -- Test 1 Select a. ID, A. C1, B. C1 FROM CHECK3_T1 Left join CHECK3_T2 B on a. ID = B. id and B. C1 = 'C1' -- Test 2 Select a. ID, A. C1, B. C1 FROM CHECK3_T1 Left join CHECK3_T2 B on a. ID = B. id where B. C1 = 'C1' -- Conclusion: left outer join: when the filter conditions in the right table are placed in the ON clause, the results are different from those after the WHERE clause. -- ============================================ -- 9. Assign a value to the variable and add Top 1 -- Test 1: (3 s) execution plan: table scan DECLARE @ ID INT SELECT @ ID = id from CHECK1_T1 WHERE C1 = 'C1' SELECT @ ID -- Test 2: (0 s) execution plan: table scan-> first few rows DECLARE @ ID INT Select top 1 @ ID = id from CHECK1_T1 WHERE C1 = 'C1' SELECT @ ID -- Conclusion: it is best to add TOP 1 to assign values to variables to improve query efficiency. Second, for accuracy, if the table CHECK1_T1 has multiple values, the last record is assigned to @ ID. -- ============================================ -- 10. Consider whether CASE statements are suitable. DECLARE @ s int = 1 SELECT * FROM CHECK5_T1 WHERE C1 = (CASE @ s when 1 THEN C1 ELSE 'C2' END) SELECT * FROM CHECK5_T1 WHERE @ S = 1 OR C1 = 'C2' /* -- = Check whether the statement requires Distinct. Execution plan: table scan-> hash matching-> parallelism-> sorting Select distinct c1 from CHECK3_T1 , Disable Select *, specify a specific column name Select c1 from CHECK4_T1 Select * from CHECK4_T1 , Insert into Table (*), specify the specific column name And Isnull. Do not use isnull for the field if it is not necessary, and the index cannot be effectively used will also occur, This works the same way as avoiding using functions in filter columns. Nested subqueries with query conditions to minimize the result set of subqueries -- ===================================================== */
|