1. Test data
Table Structure One:
CREATE TABLE Fact_data
(
day_id Integer,
popdb_id Integer,
region_id Integer,
area_id Integer,
city_id Integer,
product_id Integer,
F1 Integer,
F2 integer,
F3 Integer,
F4 Integer,
F5 Integer,
F6 Integer,
F7 Integer,
F8 integer,
user_id integer
)
Table Structure II
CREATE TABLE Fact_data_hstore
(
day_id Integer,
popdb_id Integer,
region_id Integer,
area_id Integer,
city_id Integer,
product_id Integer,
user_id Integer,
Data Hstore
)
2. Test Report under Windows
2 CPUs, 4 cores per CPU, 3GHZ
Disk EVA 4400 disks
Postgresql 9.3
2.1 Test results (one field 4 million records)
sqlserver:4060579 Records
Select region_id, AVG (f1*1.0) from Fact_data GROUP by region_id
Result set: Returns 9 Records, 2 fields, and takes five milliseconds
postgresql:4060579 Records
Select region_id, AVG (F1) from Fact_data GROUP by region_id
Result set: Returns 9 Records, 2 fields, time 1310 milliseconds
postgresql:4060579 Records not indexed
Select region_id, Avg ((data-> ' F1 '):: integer) from Fact_data_hstore
GROUP BY region_id
Result set: Returns 9 Records, 2 fields, time 3051 milliseconds
postgresql:4060579 Records Add index
Select region_id, Avg ((data-> ' F1 '):: integer) from Fact_data_hstore
GROUP BY region_id
Result set: Returns 9 Records, 2 fields, time-consuming milliseconds
Indexes have no effect on the performance of this SQL statement.
2.2 Test results (eight fields 4 million records)
sqlserver:4060579 Records
Select Region_id,avg (F1) as AF1
From Fact_data
GROUP BY region_id
Result set: Returns 9 Records, 2 fields, time 1316 milliseconds
postgresql:4060579 Records
Select Region_id,avg (F1) as AF1, AVG (F2) as af2, Avg (F3) as AF3, AVG (f4) as AF4, Avg (F5) as AF5, Avg (f6) as AF6, avg (F7) A S AF7, AVG (F8) as Af8
From Fact_data
GROUP BY region_id
Result set: Returns 9 Records, 9 fields, time 2850 milliseconds
postgresql:4060579 Records not indexed
Select region_id, Avg ((data-> ' F1 '):: integer) as AF1, avg ((data-> ' F2 '):: integer) as af2, avg ((Data-> ' F3 '):: Integer) as AF3, avg ((data-> ' F4 '):: integer) as AF4,
AVG ((data-> ' F5 '):: integer) as AF5, avg ((data-> ' F6 '):: integer) as AF6, avg ((data-> ' F7 '):: integer) as AF7, AVG (( Data-> ' F8 '):: integer) as Af8
From Fact_data_hstore
GROUP BY region_id
Result set: Returns 9 Records, 9 fields, time 12690 milliseconds
2.3 Test Results (eight fields 1 million records)
postgresql:1000000 Records
Select Region_id,avg (F1) as AF1, AVG (F2) as af2, Avg (F3) as AF3, AVG (f4) as AF4, Avg (F5) as AF5, Avg (f6) as AF6, avg (F7) A S AF7, AVG (F8) as Af8
From fact_data_1m
GROUP BY region_id
Result set: Returns 9 Records, 9 fields, time 720 milliseconds
postgresql:1000000 Records not indexed
Select region_id, Avg ((data-> ' F1 '):: integer) as AF1, avg ((data-> ' F2 '):: integer) as af2, avg ((Data-> ' F3 '):: Integer) as AF3, avg ((data-> ' F4 '):: integer) as AF4,
AVG ((data-> ' F5 '):: integer) as AF5, avg ((data-> ' F6 '):: integer) as AF6, avg ((data-> ' F7 '):: integer) as AF7, AVG (( Data-> ' F8 '):: integer) as Af8
From fact_data_hstore_1m
GROUP BY region_id
Result set: Returns 9 Records, 9 fields, time 3109 milliseconds
3. Test report under Linux virtual machine
Virtual Machine 2 cores, ordinary hard disk
3.1 Test results (one field 4 million records)
postgresql:4060579 Records
Select region_id, AVG (F1) from Fact_data GROUP by region_id
Result set: Returns 9 Records, 2 fields, time 860 milliseconds
postgresql:4060579 Records not indexed
Select region_id, Avg ((data-> ' F1 '):: integer) from Fact_data_hstore
GROUP BY region_id
Result set: Returns 9 Records, 2 fields, time 1630 milliseconds
3.2 Test Results (eight fields 4 million records)
postgresql:4060579 Records
Select Region_id,avg (F1) as AF1, AVG (F2) as af2, Avg (F3) as AF3, AVG (f4) as AF4, Avg (F5) as AF5, Avg (f6) as AF6, avg (F7) A S AF7, AVG (F8) as Af8
From Fact_data
GROUP BY region_id
Result set: Returns 9 Records, 9 fields, time 1708 milliseconds
postgresql:4060579 Records not indexed
Select region_id, Avg ((data-> ' F1 '):: integer) as AF1, avg ((data-> ' F2 '):: integer) as af2, avg ((Data-> ' F3 '):: Integer) as AF3, avg ((data-> ' F4 '):: integer) as AF4,
AVG ((data-> ' F5 '):: integer) as AF5, avg ((data-> ' F6 '):: integer) as AF6, avg ((data-> ' F7 '):: integer) as AF7, AVG (( Data-> ' F8 '):: integer) as Af8
From Fact_data_hstore
GROUP BY region_id
Result set: Returns 9 Records, 9 fields, time 6771 milliseconds
3.3 Test Results (eight fields 1 million records)
postgresql:1000000 Records
Select Region_id,avg (F1) as AF1, AVG (F2) as af2, Avg (F3) as AF3, AVG (f4) as AF4, Avg (F5) as AF5, Avg (f6) as AF6, avg (F7) A S AF7, AVG (F8) as Af8
From fact_data_1m
GROUP BY region_id
Result set: Returns 9 Records, 9 fields, time 470 milliseconds
postgresql:1000000 Records not indexed
Select region_id, Avg ((data-> ' F1 '):: integer) as AF1, avg ((data-> ' F2 '):: integer) as af2, avg ((Data-> ' F3 '):: Integer) as AF3, avg ((data-> ' F4 '):: integer) as AF4,
AVG ((data-> ' F5 '):: integer) as AF5, avg ((data-> ' F6 '):: integer) as AF6, avg ((data-> ' F7 '):: integer) as AF7, AVG (( Data-> ' F8 '):: integer) as Af8
From fact_data_hstore_1m
GROUP BY region_id
Result set: Returns 9 Records, 9 fields, time 1700 milliseconds
4. LU Table Split test
Add lu_popdb
CREATE TABLE Lu_pop
(
popdb_id Integer,
region_id Integer,
area_id Integer,
city_id integer
)
With (
Oids=false
);
ALTER TABLE Lu_pop
OWNER to Postgres;
INSERT INTO Lu_pop
Select distinct popdb_id,region_id,area_id,city_id from Fact_data
Postgresql:
Select p.region_id, AVG (F1) as AF1, AVG (F2) as af2, Avg (F3) as AF3, AVG (f4) as AF4, Avg (F5) as AF5, Avg (f6) as AF6, Avg F7 As AF7, AVG (F8) as Af8 from
Fact_data F
INNER JOIN Lu_pop p on f.popdb_id = p.popdb_id
GROUP BY p.region_id
Result set:
4 million return 9 Records, 9 fields, time 3201 milliseconds
1 million return 9 Records, 9 fields, time 820 milliseconds
Postgresql:
Select p.region_id, Avg ((data-> ' F1 '):: integer) as AF1, avg ((data-> ' F2 '):: integer) as af2, avg ((Data-> ' F3 '):: Integer) as AF3, avg ((data-> ' F4 '):: integer) as AF4,
AVG ((data-> ' F5 '):: integer) as AF5, avg ((data-> ' F6 '):: integer) as AF6, avg ((data-> ' F7 '):: integer) as AF7, AVG (( Data-> ' F8 '):: integer) as Af8
From Fact_data_hstore F
INNER JOIN Lu_pop p on f.popdb_id = p.popdb_id
GROUP BY p.region_id
Result set:
4 million return 9 Records, 9 fields, time 9101 milliseconds
1 million return 9 Records, 9 fields, time 2310 milliseconds
Add Lu_popdb_hstore
CREATE TABLE Lu_pop_hstore
(
popdb_id Integer,
Hierarchies Hstore
)
With (
Oids=false
);
ALTER TABLE Lu_pop_hstore
OWNER to Postgres;
INSERT INTO Lu_pop_hstore
Select Popdb_id,hstore (' region_id=> ' | | region_id| | ', ' | | ' Area_id=> ' | | area_id| | ', ' | | ' City_id=> ' | | city_id) from Lu_pop;
Select P.hierarchies-> ' region_id ', AVG (F1) as AF1, AVG (F2) as af2, Avg (F3) as AF3, AVG (f4) as AF4, Avg (F5) as AF5, AVG ( F6) as AF6, avg (F7) as AF7, Avg (F8) as Af8 from
Fact_data F
INNER JOIN Lu_pop_hstore p on f.popdb_id = p.popdb_id
GROUP by p.hierarchies-> ' region_id '
Result set:
4 million data returned 9 records, 9 fields, time 3760 milliseconds
1 million data returned 9 records, 9 fields, time 990 milliseconds
Select P.hierarchies-> ' region_id ', avg ((data-> ' F1 '):: integer) as AF1, avg ((data-> ' F2 '):: integer) as af2, AVG ( (Data-> ' F3 '):: integer) as AF3, avg ((data-> ' F4 '):: integer) as AF4,
AVG ((data-> ' F5 '):: integer) as AF5, avg ((data-> ' F6 '):: integer) as AF6, avg ((data-> ' F7 '):: integer) as AF7, AVG (( Data-> ' F8 '):: integer) as Af8
From Fact_data_hstore F
INNER JOIN Lu_pop_hstore p on f.popdb_id = p.popdb_id
GROUP by p.hierarchies-> ' region_id '
Result set:
4 million data returned 9 records, 9 fields, time 9852 milliseconds
1 million data returned 9 records, 9 fields, time 2500 milliseconds
5. Test conclusion
|
Regular Columns Fact (4 million) |
Hstore Fact (4 million) |
Regular Columns Fact (1 million) |
Hstore Fact (1 million) |
Full access to Fact sheet windows |
2,850 |
12,690 |
720 |
3,109 |
Full Entry Fact Sheet |
1,708 |
6,771 |
470 |
1,700 |
Split LU |
3,200 |
9,101 |
820 |
2,310 |
Split LU to be Hstore format |
3,760 |
9,852 |
990 |
2,500 |
To Store fact Data VS Hstore Storing factual data
The fact data stored in columns is 150% to 300% times higher than the actual data stored by Hstore. Thus, the performance of the data stored with Hstore is poor.
Independent Lu Table vs put Lu put all into the fact table
Split LU table and put Lu all into the fact table, all into the fact table than split into LU table performance of high 30% to 80% times, the LU split out the performance is poor.
Independent Lu Table vs Independent Lu Hstore Table
It is 8% to 20% higher performance to store LU data in columns than in Hstore format. Thus, it is acceptable to use Hstore to store LU data with reduced performance.
data Volume 4 million VS 1 million
The response time is almost proportional to the amount of data, so be sure to partition the table so that the amount of data is controlled within acceptable ranges.
Windows VS Linux Virtual Machines
Even though the hardware of Windows is powerful, the performance is 50% to 80% worse than the Linux virtual machine.
Postgresql Report Performance Test