Http://bbs.chinaunix.net/thread-1753130-1-1.html
When a transaction commits, Innobase calls Innobase_commit in ha_innodb.cc, and Innobase_commit calls Trx_commit_complete_for_mysql (TRX0TRX.C) To call Log_write_up_to (LOG0LOG.C), that is, when Innobase commits the transaction, Log_write_up_to is called to write redo log.
In Innobase_commit
- if (all # if the transaction is committed
- || (!thd_test_options (THD, Option_not_autocommit | Option_begin)) {
Copy Code
Commit serialization of a transaction through the following code
- if (innobase_commit_concurrency > 0) {
- Pthread_mutex_lock (&commit_cond_m);
- commit_threads++;
- if (Commit_threads > Innobase_commit_concurrency) {
- commit_threads--;
- Pthread_cond_wait (&commit_cond,
- &COMMIT_COND_M);
- Pthread_mutex_unlock (&commit_cond_m);
- Goto retry;
- }
- else {
- Pthread_mutex_unlock (&commit_cond_m);
- }
- }
Copy Code
- Trx->flush_log_later = TRUE; # Prohibit flush Binlog to disk when doing commit operation
- Innobase_commit_low (TRX);
- Trx->flush_log_later = FALSE;
Copy Code
Skip the Innobase_commit_low call first, and start calling Trx_commit_complete_for_mysql to do the write log operation
- Trx_commit_complete_for_mysql (TRX); #开始flush Log
- Trx->active_trans = 0;
Copy Code
In Trx_commit_complete_for_mysql, the main thing is to judge the system parameter Srv_flush_log_at_trx_commit value to call
Log_write_up_to, or write redo log file or Write&&flush to disk
- if (!trx->must_flush_log_later) {
- /* do nothing */
- } else if (Srv_flush_log_at_trx_commit = = 0) {#flush_log_at_trx_commit = 0, transaction commit does not write redo log
- /* do nothing */
- } else if (Srv_flush_log_at_trx_commit = = 1) {#flush_log_at_trx_commit = 1, transaction commits write log and flush disk if flush mode is not srv_unix_nosync (This is not very familiar)
- if (Srv_unix_file_flush_method = = Srv_unix_nosync) {
- /* Write the log but don't flush it to disk */
- Log_write_up_to (LSN, Log_wait_one_group, FALSE);
- } else {
- /* Write The log to the log files and flush them to
- Disk */
- Log_write_up_to (LSN, Log_wait_one_group, TRUE);
- }
- } else if (Srv_flush_log_at_trx_commit = = 2) {#如果是2, write only to redo log
- /* Write the log but don't flush it to disk */
- Log_write_up_to (LSN, Log_wait_one_group, FALSE);
- } else {
- Ut_error;
- }
Copy Code
Then look at log_write_up_to.
- if (Flush_to_disk #如果flush到磁盘, compares whether the LSN of the current commit is greater than the LSN that has been flush to disk
- && ut_dulint_cmp (LOG_SYS->FLUSHED_TO_DISK_LSN, LSN) >= 0) {
- Mutex_exit (& (Log_sys->mutex));
- Return
- }
- if (!flush_to_disk #如果不flush磁盘则比较当前commit的lsn是否大于已经写到所有redo the LSN of the log file, or if only one of the group completion conditions is greater than the LSN that has been written to a redo file
- && (ut_dulint_cmp (LOG_SYS->WRITTEN_TO_ALL_LSN, LSN) >= 0
- || (UT_DULINT_CMP (LOG_SYS->WRITTEN_TO_SOME_LSN, LSN)
- >= 0
- && wait! = log_wait_all_groups))) {
- Mutex_exit (& (Log_sys->mutex));
- Return
- }
- #下面的代码判断是否log在write, some words await their completion
- if (Log_sys->n_pending_writes > 0) {
- if (Flush_to_disk # if required to flush to disk, if the LSN being flushed includes a commit LSN, just wait for the operation to complete
- && ut_dulint_cmp (LOG_SYS->CURRENT_FLUSH_LSN, LSN)
- >= 0) {
- Goto Do_waits;
- }
- if (!flush_to_disk # if it is brushed to redo log file then if the LSN of the commit is included in the write LSN, just wait.
- && ut_dulint_cmp (LOG_SYS->WRITE_LSN, LSN) >= 0) {
- Goto Do_waits;
- }
- ......
- if (!flush_to_disk # If in the current IO idle case and does not need to flush to disk, then if the next write location has reached Buf_free location Description Wirte operation has been completed, directly return
- && Log_sys->buf_free = = log_sys->buf_next_to_write) {
- Mutex_exit (& (Log_sys->mutex));
- Return
- }
Copy Code
The following takes the group, sets the relevant write or flush related fields, and gets the block number of the starting and ending positions
- log_sys->n_pending_writes++;
- Group = Ut_list_get_first (log_sys->log_groups);
- group->n_pending_writes++; /* We assume here and we have only
- One log group! */
- Os_event_reset (log_sys->no_flush_event);
- Os_event_reset (log_sys->one_flushed_event);
- Start_offset = log_sys->buf_next_to_write;
- End_offset = log_sys->buf_free;
- Area_start = Ut_calc_align_down (Start_offset, os_file_log_block_size);
- Area_end = Ut_calc_align (End_offset, os_file_log_block_size);
- Ut_ad (Area_end-area_start > 0);
- LOG_SYS->WRITE_LSN = log_sys->lsn;
- if (Flush_to_disk) {
- LOG_SYS->CURRENT_FLUSH_LSN = log_sys->lsn;
- }
Copy Code
Log_block_set_checkpoint_no Call Set End_offset the log_block_checkpoint_no of the block is the next checkpoint number in Log_sys.
- Log_block_set_flush_bit (Log_sys->buf + Area_start, TRUE); # This doesn't look clear
- Log_block_set_checkpoint_no (
- Log_sys->buf + area_end-os_file_log_block_size,
- LOG_SYS->NEXT_CHECKPOINT_NO);
Copy Code
Save data that is not part of End_offset but in its block to the next free block
- ut_memcpy (Log_sys->buf + area_end,
- Log_sys->buf + area_end-os_file_log_block_size,
- Os_file_log_block_size);
Copy Code
For each group call Log_group_write_buf write redo log buffer
- while (group) {
- Log_group_write_buf (
- Group, Log_sys->buf + Area_start,
- Area_end-area_start,
- Ut_dulint_align_down (LOG_SYS->WRITTEN_TO_ALL_LSN,
- Os_file_log_block_size),
- Start_offset-area_start);
- Log_group_set_fields (group, LOG_SYS->WRITE_LSN); # Calculate the LSN and offset for this write to set GROUP->LSN and Group->lsn_offset
- Group = Ut_list_get_next (log_groups, group);
- }
- ......
- if (Srv_unix_file_flush_method = = Srv_unix_o_dsync) {# What's this stuff?
- /* O_dsync means the OS did not buffer the log file at all:
- So we had also flushed to disk and we have written */
- LOG_SYS->FLUSHED_TO_DISK_LSN = log_sys->write_lsn;
- } else if (Flush_to_disk) {
- Group = Ut_list_get_first (log_sys->log_groups);
- Fil_flush (group->space_id); # Last Call Fil_flush execution flush to disk
- LOG_SYS->FLUSHED_TO_DISK_LSN = log_sys->write_lsn;
- }
Copy Code
Next thing you see, Log_group_write_buf did something.
In Log_group_calc_size_offset, the LSN of the last record is taken from the group (note that it is a 1-ring buffer of log files) and the LSN is calculated relative to the last difference
- # call Log_group_calc_size_offset to calculate Group->lsn_offset to remove the size of multiple log_file head lengths, such as Lsn_offset falls on the 3rd LOG file, then need to subtract 3*log_ Size of the File_hdr_size
- Gr_lsn_size_offset = (Ib_longlong)
- Log_group_calc_size_offset (Group->lsn_offset, group);
- Group_size = (ib_longlong) log_group_get_capacity (group); # calculates the size of the data portion of the group after all log_file_hdr_size lengths are removed
- # below is a typical differential calculation for ring structures
- if (ut_dulint_cmp (LSN, GR_LSN) >= 0) {
- difference = (Ib_longlong) ut_dulint_minus (LSN, GR_LSN);
- } else {
- difference = (Ib_longlong) ut_dulint_minus (GR_LSN, LSN);
- difference = difference% Group_size;
- difference = group_size-difference;
- }
- Offset = (gr_lsn_size_offset + difference)% Group_size;
- # finally count each log file header size, return the real offset
- Return (Log_group_calc_real_offset ((ulint) offset, group));
Copy Code
Then look
- # If you need to write more than one file size
- if ((next_offset% group->file_size) + len > Group->file_size) {
- Write_len = group->file_size # writes to the end of file
- -(next_offset% group->file_size);
- } else {
- Write_len = Len; # no one writes Len a block
- }
- # Finally the real content is to write buffer, if you cross file, you need to write the file log file head part
- if ((next_offset% group->file_size = = log_file_hdr_size)
- && Write_header) {
- /* We start to write a new log file instance in the group */
- Log_group_file_header_flush (Group,
- Next_offset/group->file_size,
- START_LSN);
- srv_os_log_written+= os_file_log_block_size;
- srv_log_writes++;
- }
- # call Fil_io to execute buffer write
- if (log_do_write) {
- log_sys->n_log_ios++;
- srv_os_log_pending_writes++;
- Fil_io (Os_file_write | Os_file_log, TRUE, group->space_id,
- Next_offset/univ_page_size,
- Next_offset% univ_page_size, Write_len, buf, group);
- srv_os_log_pending_writes--;
- srv_os_log_written+= Write_len;
- srv_log_writes++;
- }
Copy Code
Redo Log Write and flush