Python-filter Mysql slow logs and python-filter mysql logs
##=================================================== ======================================== ##
First joke:
My new girlfriend asked me to go to his house to play. I thought it was not good to be empty-handed, so I told her that I had to buy something. Who knows? She suddenly said, "Don't buy, There is a box at home!
##=================================================== ======================================== ##
When I was tossing a MySQL with a disk performance problem yesterday, I found tens of thousands of lines of slow logs in just 10 minutes according to the monitoring results. I am not familiar with the slow log structure, open a 600 + MB file and drag it to the scroll bar. A lot of slow SQL statements have hundreds of rows. It seems that the root cause of the problem has not been found. At last, I tried my best to find it inadvertently, otherwise, let me grow old in ignorance ???
The SQL is as follows:
Impact:
Suspected causes:
Because the subquery does not have any filtering conditions, the full table scan is performed, and the server memory is insufficient. As a result, the scanned data is written to a temporary file, and other services are running on the server, the server cannot process all requests in a timely manner, resulting in a backlog. The backlog of requests increases the pressure on the server, leading to a vicious circle of mutual influences.
I don't know why the R & D is so cool. Anyway, the Mysql statement optimization processing capability is so poor. Please try to simplify the SQL statement as much as possible.
##=================================================== ================== ##
To put it bluntly, because the execution frequency of this SQL statement is low, the pt tool cannot be used for analysis to locate the SQL statement accurately. However, it seems quite painful to use it. Therefore, write a small tool for processing, it can also solve the problem of inconvenient reading of slow logs.
The python script is as follows:
# coding: utf-8import datetimefrom optparse import OptionParserMAX_QUERY_TIME_SECONDS = 10START_DATETIME = datetime.datetime.minEND_DATETIME = datetime.datetime.maxSLOW_LOG_PATH = ""class EventInfo(object): def __init__(self): self.exec_time = None self.login_user = "" self.query_time_seconds = 0 self.lock_time_seconds = 0 self.rows_sent = 0 self.rows_examined = 0 self.command_text = "" self.file_line_num = 0def get_datetime_by_time_line(time_line): if time_line.startswith("# Time:"): time_str = "20" + time_line.replace("# Time:", "").replace(" ", " ").strip() return datetime.datetime.strptime(time_str, "%Y%m%d %H:%M:%S") else: return Nonedef set_query_info(event_info, query_info_line): line = query_info_line if line.startswith("# Query_time:"): tmp_line = line.replace("#", "").replace(": ", ":").replace(" ", " ").replace(" ", " ") tmp_items = tmp_line.split(" ") for tmp_item in tmp_items: # print(tmp_item) if tmp_item.strip().startswith("Query_time:"): event_info.query_time_seconds = float(tmp_item.replace("Query_time:", "")) if tmp_item.strip().startswith("Lock_time:"): event_info.lock_time_seconds = float(tmp_item.replace("Lock_time:", "")) if tmp_item.strip().startswith("Rows_sent:"): event_info.rows_sent = float(tmp_item.replace("Rows_sent:", "")) if tmp_item.strip().startswith("Rows_examined:"): event_info.rows_examined = float(tmp_item.replace("Rows_examined:", ""))def set_login_user(event_info, login_user_line): line = login_user_line if line.startswith("# User@Host:"): event_info.login_user = line.replace("# User@Host:", "").strip()def set_command_text(event_info, command_text_line): line = command_text_line event_info.command_text += chr(13) + linedef print_event_info(event_info): print_info = """file_line_num: {7}Time: {0}User@Host: {1}Query_time: {2:.5f} secondsLock_time: {3:.5f} secondsRows_sent: {4}Rows_examined: {5}command_text:{6}""".format(event_info.exec_time, event_info.login_user, event_info.query_time_seconds, event_info.lock_time_seconds, event_info.rows_sent, event_info.rows_examined, event_info.command_text, event_info.file_line_num) print("*" * 70) print(print_info)def get_user_paras(): try: opt = OptionParser() opt.add_option('--slow_log', dest='slow_log', type=str, help='the file path of the slow log ') opt.add_option('--long_query_seconds', dest='long_query_seconds', type=float, help='long query seconds') opt.add_option('--start_datetime', dest="start_datetime", type=str, help="start datetime, format is %Y-%m-%d %H:%M:%S") opt.add_option('--end_datetime', dest="end_datetime", type=str, help="end datetime, format is %Y-%m-%d %H:%M:%S") (options, args) = opt.parse_args() error_messages = [] slow_log = options.slow_log start_datetime = options.start_datetime long_query_seconds = options.long_query_seconds end_datetime = options.end_datetime if not slow_log: error_messages.append("slow_log must be set;") else: global SLOW_LOG_PATH SLOW_LOG_PATH = slow_log if not long_query_seconds: error_messages.append("long_query_seconds must be set;") else: global MAX_QUERY_TIME_SECONDS MAX_QUERY_TIME_SECONDS = long_query_seconds try: if start_datetime is not None: tmp_start_datetime = datetime.datetime.strptime(start_datetime, "%Y-%m-%d %H:%M:%S") global START_DATETIME START_DATETIME = tmp_start_datetime except: error_messages.append("the format of start_datetime was wrong ;") try: if end_datetime is not None: tmp_end_datetime = datetime.datetime.strptime(end_datetime, "%Y-%m-%d %H:%M:%S") global END_DATETIME END_DATETIME = tmp_end_datetime except: error_messages.append("the format of end_datetime was wrong ;") if len(error_messages) > 0: opt.print_help() for error_message in error_messages: print(error_message) return False else: return True except Exception as ex: print("exception :{0}".format(str(ex))) return Falsedef read_log(): global MAX_QUERY_TIME_SECONDS file_path = SLOW_LOG_PATH file_handler = open(file_path) line = file_handler.readline() line_num = 1 find_event_count = 0 current_event_info = EventInfo() current_time = "" has_time = False while line: if line.startswith("# Time:"): current_time = get_datetime_by_time_line(line) if current_time < START_DATETIME: line = file_handler.readline() line_num += 1 continue if current_time > END_DATETIME: break line = file_handler.readline() line_num += 1 has_time = True continue if has_time: if line.startswith("# User@Host:"): if current_event_info.query_time_seconds > MAX_QUERY_TIME_SECONDS: print_event_info(event_info=current_event_info) find_event_count += 1 current_event_info = EventInfo() current_event_info.file_line_num = line_num current_event_info.exec_time = current_time set_login_user(event_info=current_event_info, login_user_line=line) elif line.startswith("# Query_time:"): set_query_info(event_info=current_event_info, query_info_line=line) else: set_command_text(event_info=current_event_info, command_text_line=line) line = file_handler.readline() line_num += 1 print("*" * 70) print("parse completely") print("find {0} events".format(find_event_count))def main(): if get_user_paras(): read_log()if __name__ == '__main__': main()
The script call method is as follows:
C:\Python27\python.exe D:/GGA/Code/slow_query_find/find_slow_quer.py --slow_log="D:\slow.log.2016010900" --long_query_seconds=4 --start_datetime="2016-01-08 15:46:00" --end_datetime="2016-01-08 15:50:00"
The displayed result is as follows:
##=================================================== ======================================== ##