The following code has been tested and can be run directly.
A brief description of Oracle analysis functions, multidimensional functions, and Model functions, mainly for BI report statistics, not necessarily comprehensive, but a few instructions on BI application scenarios
-- Create a sales quantity table with an increasing data Trend
Create table ComputerSales
SELECT
120 + TRUNC (rn/12) + ROUND (DBMS_RANDOM.VALUE () SalesNumber
FROM
(
SELECT level, ROWNUM rn
FROM DUAL
Connect by rownum <= 120
);
-- The following is used to compare the statistics of NULL values and non-NULL values. It can be seen that there is a problem with COUNT in the case of NULL values. Therefore, it is recommended that you do not use a NULL value column in the database system.
SELECT
COUNT (*),
COUNT (a. SalesNumber ),
COUNT (DISTINCT a. SalesNumber ),
SUM (a. SalesNumber ),
AVG (a. SalesNumber ),
MAX (a. SalesNumber ),
MIN (a. SalesNumber)
FROM ComputerSales;
Delete from ComputerSales WHERE SalesNumber is null;
COMMIT;
Insert into ComputerSales VALUES (NULL );
COMMIT;
Insert into ComputerSales VALUES (NULL );
COMMIT;
SELECT
COUNT (*),
COUNT (a. SalesNumber ),
COUNT (DISTINCT a. SalesNumber ),
SUM (a. SalesNumber ),
AVG (a. SalesNumber ),
MAX (a. SalesNumber ),
MIN (a. SalesNumber)
FROM ComputerSales;
SELECT trunc (dbms_random.value (1,101 )),
Delete from ComputerSales WHERE SalesNumber is null;
COMMIT;
-- Create a table with the date field added
Create table ComputerSalesBAK
SELECT SalesNumber, TRUNC (SYSDATE) + MOD (a.DateSEQ-1, 10) SalesDate
FROM (SELECT SalesNumber, ROW_NUMBER () OVER (order by rowid) DateSEQ FROM ComputerSales);
Drop table ComputerSales;
RENAME ComputerSalesBAK TO ComputerSales;
-- The following two creation methods are used to create Area columns and date columns.
Create table ComputerSalesBAK
SELECT SalesNumber, TRUNC (SYSDATE) + MOD (A. DateSEQ-1, 24) SalesDate,
Case when trunc (DateSEQ-1)/24) = 1 THEN South China
When trunc (DateSEQ-1)/24) = 2 THEN North China
When trunc (DateSEQ-1)/24) = 3 THEN Northeast Region
When trunc (DateSEQ-1)/24) = 4 THEN East China
Other ELSE regions
END
FROM (SELECT SalesNumber, ROW_NUMBER () OVER (order by rowid) DateSEQ FROM ComputerSales);
Drop table ComputerSales;
RENAME ComputerSalesBAK TO ComputerSales;
-- In this example, duplicate data of SalesDate and Area can be constructed.
Create table ComputerSalesBAK
SELECT SalesNumber,
TRUNC (SYSDATE) + MOD (A. DateSEQ-1, 10) SalesDate,
Case when AreaSEQ = 1 THEN South China
WHEN AreaSEQ = 2 THEN North China
WHEN AreaSEQ = 3 THEN Northeast Region
WHEN AreaSEQ = 4 THEN East China
Other ELSE regions
END
FROM (SELECT SalesNumber, ROW_NUMBER () OVER (order by rowid) DateSEQ, ROUND (dbms_random.VALUE (1, 5) AreaSEQ FROM ComputerSales);
Drop table ComputerSales;
RENAME ComputerSalesBAK TO ComputerSales;
-- Moving the average value, accumulative sum, average value of the current window, sum of the current window, and scope of the window function and sorting Function
SELECT
Area, SalesDate, SalesNumber,
MIN (SalesNumber) OVER (partition by Area order by SalesDate) AS min_Area_SalesDate,
MAX (SalesNumber) OVER (partition by Area order by SalesDate) AS max_Area_SalesDate,
AVG (SalesNumber) OVER (partition by Area order by SalesDate) AS avg_Area_SalesDate,
SUM (SalesNumber) OVER (partition by Area order by SalesDate) AS sum_Area_SalesDate,
COUNT (*) OVER (partition by Area order by SalesDate) AS count_Area,
MIN (SalesNumber) OVER (partition by Area) AS min_Area,
MAX (SalesNumber) OVER (partition by Area) AS max_Area,
AVG (SalesNumber) OVER (partition by Area) AS avg_Area,
SUM (SalesNumber) OVER (partition by Area) AS sum_Area,
COUNT (*) OVER (partition by Area) AS count_Area
FROM ComputerSales
-- Observe the differences between Rank, Dense_Rank, Row_number, and Count.
-- Rank: Skip number. Dense_Rank is not skip. Row_number is unique. Count also jumps based on statistics.
-- If the partition by and order by fields are unique, there is no difference between the four functions.
SELECT
Area, SalesDate, SalesNumber,
RANK () OVER (partition by Area order by SalesNumber) AS Rank_Area_SalesNumber,
DENSE_RANK () OVER (partition by Area order by SalesNumber) AS DenseRank_Area_SalesNumber,
ROW_NUMBER () OVER (partition by Area order by SalesNumber) AS Rownumber_Area_SalesNumber,
COUNT (*) OVER (partition by Area order by SalesNumber) AS CountAll_Area_SalesNumber,
COUNT (SalesNumber) OVER (partition by Area order by SalesNumber) AS Count_Area_SalesNumber
FROM ComputerSales
-- Observe the similarities and differences between Lag and Lead and the Lag Parameters
-- By default, Lag takes the value of the first row, and Lead takes the value of the last row.
-- The first parameter of Lag and lead determines the location of the row. The second parameter is the default value when the value is not obtained.
SELECT
Area, SalesDate, SalesNumber,
LAG (SalesNumber) OVER (partition by Area order by SalesDate) AS Lag_Area_SalesNumber,
LEAD (SalesNumber) OVER (partition by Area order by SalesDate) AS Lead_Area_SalesNumber,
LAG (SalesNumber, 1) OVER (partition by Area order by SalesDate) AS Lag1_Area_SalesNumber,
LAG (SalesNumber, 2) OVER (partition by Area order by SalesDate) AS Lag2_Area_SalesNumber,
LEAD (SalesNumber, 1) OVER (partition by Area order by SalesDate) AS lead+area_salesnumber,
LEAD (SalesNumber, 2) OVER (partition by Area order by SalesDate) AS Lead2_Area_SalesNumber,
LAG (SalesNumber, 1, 0) OVER (partition by Area order by SalesDate) AS Lag10_Area_SalesNumber,
LAG (SalesNumber, 2, 1) OVER (partition by Area order by SalesDate) AS Lag21_Area_SalesNumber,
LEAD (SalesNumber, 1, 0) OVER (partition by Area order by SalesDate) AS Lead10_Area_SalesNumber,
LEAD (SalesNumber, 2, 1) OVER (partition by Area order by SalesDate) AS Lead21_Area_SalesNumber
FROM ComputerSales
-- Observe the differences between First_Value and Last_Value.
-- If a column corresponding to the maximum and minimum values in the same group is selected, use FIRST_VALUE to sort the columns in ascending or descending order.
-- LAST_VALUE is like the last row requested by the two groups.
SELECT
Area, SalesDate, SalesNumber,
FIRST_VALUE (SalesDate) OVER (partition by Area order by SalesNumber) AS FirstValue_Area,
FIRST_VALUE (SalesDate) OVER (partition by Area order by SalesNumber DESC) AS FirstValue_Area_Desc,
LAST_VALUE (SalesDate) OVER (partition by Area order by SalesNumber) AS LastValue_Area,
LAST_VALUE (SalesDate) OVER (partition by Area order by SalesNumber DESC) AS LastValue_Area_Desc
FROM ComputerSales
-- Unlike the above, KEEP needs to be used with DENSE_RANK FIRST | DENSE_RANK LAST, and obtains the maximum or minimum value obtained by sorting by SalesNumber in the same Area, the above is just the first line or the last line.
SELECT Area, SalesDate, SalesNumber,
DENSE_RANK () OVER (partition by Area order by SalesNumber) DENSE_RANK,
MIN (SalesDate) KEEP (DENSE_RANK first order by SalesNumber) OVER (partition by Area) min_first,
MIN (SalesDate) KEEP (DENSE_RANK last order by SalesNumber) OVER (partition by Area) min_last,
MAX (SalesDate) KEEP (DENSE_RANK first order by SalesNumber) OVER (partition by Area) max_first,
MAX (SalesDate) KEEP (DENSE_RANK last order by SalesNumber) OVER (partition by Area) max_last
FROM Co