CDC implements data synchronization and incremental updates
In sqlserver2008, CDC is used to track data updates. Compared with the previous time stamp, triggers are more convenient and convenient.
References:
Http://www.mssqltips.com/tip.asp? Tip = 1755
Http://blog.csdn.net/ldslove/archive/2010/05/20/5612248.aspx
The implementation steps are as follows:
1. Configure CDC
-- Enable CDC
Use db1
Go
Exec SYS. sp_cdc_enable_db
-- Verify
-- 0: CDC disabled; 1: CDC Enabled
Select is_cdc_enabled from SYS. databases where database_id = db_id ()
-- Enable table CDC
Use db1;
Go
Exec SYS. sp_cdc_enable_table
@ Source_schema = 'dbo'
, @ Source_name = 't_ cdc_ta'
, @ Role_name = NULL
, @ Capture_instance = NULL
, @ Supports_net_changes = 1
, @ Index_name = NULL
, @ Captured_column_list = NULL
, @ Filegroup_name = default
, @ Allow_partition_switch = 1
/*
After the function is enabled, the CDC architecture is generated, and the query function and change data table are generated.
CDC. captured_columns
CDC. change_tables
CDC. ddl_history
CDC. index_columns
CDC. lsn_time_mapping
DBO. systranschemas
CDC. dbo_t_cdc_ta_ct
*/
-- Table structure
Create Table [t_cdc_ta]
(
[ID] [int] identity (1, 1) primary key not null,
[Name] [varchar] (20) null,
[ADDR] [varchar] (20) null,
[TTime] [datetime] Null
)
2. Tracking Change Data
When adding, inserting, and deleting data to the source table t_cdc_ta, you can see the following data in the change data table [CDC]. [dbo_t_cdc_ta_ct ]:
__$ Operation:
1-delete, 2-add, 4-Update
3. Using ETL to incrementally update data based on changed data
The script is as follows:
Use [db1]
Go
Create Table [DBO]. [cdc_capture_log] (
[Cdc_capture_log_id] [int] identity (1, 1) not null,
[Capture_instance] [nvarchar] (50) not null,
[Start_time] [datetime] not null,
[Min_lsn] [binary] (10) Not null,
[Max_lsn] [binary] (10) Not null,
[End_time] [datetime] Null,
[Status_code] [int] not null default (0)
)
Create procedure [DBO]. [usp_init_cdc_capture_log]
@ Capture_instance nvarchar (50)
As
Begin
Set nocount on;
Declare @ start_lsn binary (10 ),
@ End_lsn binary (10 ),
@ Prev_max_lsn binary (10)
-- Get the max lsn for the capture instance from -- the last Extract
Select @ prev_max_lsn = max (max_lsn)
From DBO. cdc_capture_log
Where capture_instance = @ capture_instance
-- If no row found in cdc_capture_log get the min lsn -- for the capture instance
If @ prev_max_lsn is null
Set @ start_lsn = SYS. fn_cdc_get_min_lsn (@ capture_instance)
Else
Set @ start_lsn = SYS. fn_cdc_increment_lsn (@ prev_max_lsn)
-- Get the max lsn
Set @ end_lsn = SYS. fn_cdc_get_max_lsn ()
If @ start_lsn >=@ end_lsn
Set @ start_lsn = @ end_lsn
Insert into DBO. cdc_capture_log
(
Capture_instance,
Start_time,
Min_lsn,
Max_lsn
)
Values (
@ Capture_instance,
Getdate (),
@ Start_lsn,
@ End_lsn
)
Select cast (scope_identity () as INT) cdc_capture_log_id
End
Go
Create procedure [DBO]. [usp_extract_userm_capture_log]
@ Cdc_capture_log_id int
As
Begin
Set nocount on;
Declare @ start_lsn binary (10), @ end_lsn binary (10) -- get the lsn range to process
Select @ start_lsn = min_lsn, @ end_lsn = max_lsn from DBO. cdc_capture_log
Where cdc_capture_log_id = @ cdc_capture_log_id
-- Extract and return the changes
Select M. tran_end_time modified_ts,
X .*
From CDC. fn_cdc_get_all_changes_dbo_t_cdc_ta (@ start_lsn, @ end_lsn, 'all') x
Join CDC. lsn_time_mapping m on M. start_lsn = x. __$ start_lsn;
End
Go
Create procedure [DBO]. [usp_end_cdc_capture_log]
@ Cdc_capture_log_id int
As
Begin
Set nocount on;
Update DBO. cdc_capture_log
Set end_time = getdate (),
Status_code = 1
Where cdc_capture_log_id = @ cdc_capture_log_id
End
Go
-- Create a table with the same structure on another database as the test table for synchronization data
Use montior
Go
Create Table [DBO]. [t_cdc_ta] (
[ID] [int] primary key not null,
[Name] [varchar] (20) null,
[ADDR] [varchar] (20) null,
[TTime] [datetime] Null
)
Go
Create proc [DBO]. [p_merge]
@ Brief int,
@ ID int,
@ Name varchar (20 ),
@ ADDR varchar (20 ),
@ TTime datetime
As
-- Delete
If @ override = 1
Begin
Delete from DBO. t_cdc_ta
Where id = @ ID
End
Else if @ resolve = 2 -- add
Begin
Insert into DBO. t_cdc_ta (ID, name, ADDR, tTime)
Values (@ ID, @ name, @ ADDR, @ tTime)
End
Else if @ updated = 4 -- Update
Begin
Update DBO. t_cdc_ta
Set Name = @ name, ADDR = @ ADDR, tTime = @ tTime
Where id = @ ID
End
Go
Disable CDC
Exec sp_cdc_disable_table
Exec sp_cdc_disable_db
In this way, a scheduled synchronization update can be implemented, and a job is used to constantly read the new lsn to update the target data table. Of course, the synchronization time must be later than the time when the data change cleaning job is performed,
By default, when CDC is configured, two jobs are configured.
CDC. db1_capture: job for capturing changes
CDC. db1_cleanup: data cleanup job. Cleanup is performed two days in the morning.
I saw a buddy using the SSIS condition sharding component when synchronizing data. I tested that this data change has a sequential order and cannot be directly split into datasets for execution,
Here I am trying to use the loop component to process data one by one. I hope there will be a better way...
CDC implements data synchronization and incremental updates
In sqlserver2008, CDC is used to track data updates. Compared with the previous time stamp, triggers are more convenient and convenient.
References:
Http://www.mssqltips.com/tip.asp? Tip = 1755
Http://blog.csdn.net/ldslove/archive/2010/05/20/5612248.aspx
The implementation steps are as follows:
1. Configure CDC
-- Enable CDC
Use db1
Go
Exec SYS. sp_cdc_enable_db
-- Verify
-- 0: CDC disabled; 1: CDC Enabled
Select is_cdc_enabled from SYS. databases where database_id = db_id ()
-- Enable table CDC
Use db1;
Go
Exec SYS. sp_cdc_enable_table
@ Source_schema = 'dbo'
, @ Source_name = 't_ cdc_ta'
, @ Role_name = NULL
, @ Capture_instance = NULL
, @ Supports_net_changes = 1
, @ Index_name = NULL
, @ Captured_column_list = NULL
, @ Filegroup_name = default
, @ Allow_partition_switch = 1
/*
After the function is enabled, the CDC architecture is generated, and the query function and change data table are generated.
CDC. captured_columns
CDC. change_tables
CDC. ddl_history
CDC. index_columns
CDC. lsn_time_mapping
DBO. systranschemas
CDC. dbo_t_cdc_ta_ct
*/
-- Table structure
Create Table [t_cdc_ta]
(
[ID] [int] identity (1, 1) primary key not null,
[Name] [varchar] (20) null,
[ADDR] [varchar] (20) null,
[TTime] [datetime] Null
)
2. Tracking Change Data
When adding, inserting, and deleting data to the source table t_cdc_ta, you can see the following data in the change data table [CDC]. [dbo_t_cdc_ta_ct ]:
__$ Operation:
1-delete, 2-add, 4-Update
3. Using ETL to incrementally update data based on changed data
The script is as follows:
Use [db1]
Go
Create Table [DBO]. [cdc_capture_log] (
[Cdc_capture_log_id] [int] identity (1, 1) not null,
[Capture_instance] [nvarchar] (50) not null,
[Start_time] [datetime] not null,
[Min_lsn] [binary] (10) Not null,
[Max_lsn] [binary] (10) Not null,
[End_time] [datetime] Null,
[Status_code] [int] not null default (0)
)
Create procedure [DBO]. [usp_init_cdc_capture_log]
@ Capture_instance nvarchar (50)
As
Begin
Set nocount on;
Declare @ start_lsn binary (10 ),
@ End_lsn binary (10 ),
@ Prev_max_lsn binary (10)
-- Get the max lsn for the capture instance from -- the last Extract
Select @ prev_max_lsn = max (max_lsn)
From DBO. cdc_capture_log
Where capture_instance = @ capture_instance
-- If no row found in cdc_capture_log get the min lsn -- for the capture instance
If @ prev_max_lsn is null
Set @ start_lsn = SYS. fn_cdc_get_min_lsn (@ capture_instance)
Else
Set @ start_lsn = SYS. fn_cdc_increment_lsn (@ prev_max_lsn)
-- Get the max lsn
Set @ end_lsn = SYS. fn_cdc_get_max_lsn ()
If @ start_lsn >=@ end_lsn
Set @ start_lsn = @ end_lsn
Insert into DBO. cdc_capture_log
(
Capture_instance,
Start_time,
Min_lsn,
Max_lsn
)
Values (
@ Capture_instance,
Getdate (),
@ Start_lsn,
@ End_lsn
)
Select cast (scope_identity () as INT) cdc_capture_log_id
End
Go
Create procedure [DBO]. [usp_extract_userm_capture_log]
@ Cdc_capture_log_id int
As
Begin
Set nocount on;
Declare @ start_lsn binary (10), @ end_lsn binary (10) -- get the lsn range to process
Select @ start_lsn = min_lsn, @ end_lsn = max_lsn from DBO. cdc_capture_log
Where cdc_capture_log_id = @ cdc_capture_log_id
-- Extract and return the changes
Select M. tran_end_time modified_ts,
X .*
From CDC. fn_cdc_get_all_changes_dbo_t_cdc_ta (@ start_lsn, @ end_lsn, 'all') x
Join CDC. lsn_time_mapping m on M. start_lsn = x. __$ start_lsn;
End
Go
Create procedure [DBO]. [usp_end_cdc_capture_log]
@ Cdc_capture_log_id int
As
Begin
Set nocount on;
Update DBO. cdc_capture_log
Set end_time = getdate (),
Status_code = 1
Where cdc_capture_log_id = @ cdc_capture_log_id
End
Go
-- Create a table with the same structure on another database as the test table for synchronization data
Use montior
Go
Create Table [DBO]. [t_cdc_ta] (
[ID] [int] primary key not null,
[Name] [varchar] (20) null,
[ADDR] [varchar] (20) null,
[TTime] [datetime] Null
)
Go
Create proc [DBO]. [p_merge]
@ Brief int,
@ ID int,
@ Name varchar (20 ),
@ ADDR varchar (20 ),
@ TTime datetime
As
-- Delete
If @ override = 1
Begin
Delete from DBO. t_cdc_ta
Where id = @ ID
End
Else if @ resolve = 2 -- add
Begin
Insert into DBO. t_cdc_ta (ID, name, ADDR, tTime)
Values (@ ID, @ name, @ ADDR, @ tTime)
End
Else if @ updated = 4 -- Update
Begin
Update DBO. t_cdc_ta
Set Name = @ name, ADDR = @ ADDR, tTime = @ tTime
Where id = @ ID
End
Go
Disable CDC
Exec sp_cdc_disable_table
Exec sp_cdc_disable_db
In this way, a scheduled synchronization update can be implemented, and a job is used to constantly read the new lsn to update the target data table. Of course, the synchronization time must be later than the time when the data change cleaning job is performed,
By default, when CDC is configured, two jobs are configured.
CDC. db1_capture: job for capturing changes
CDC. db1_cleanup: data cleanup job. Cleanup is performed two days in the morning.
I saw a buddy using the SSIS condition sharding component when synchronizing data. I tested that this data change has a sequential order and cannot be directly split into datasets for execution,
Here I am trying to use the loop component to process data one by one. I hope there will be a better way...