1. view TCP connection status netstat-nat | awk '{print $6}' | sort | uniq-c | sort-rnnetstat-n | awk '/^ tcp/{++ S [$ NF]}; END {for (ainS) printa, S [a]} 'netstat-n | awk'/^ tcp/{++ state [$ NF]}; END {for (keyin
1. View TCP connection status
netstat -nat |awk '{print $6}'|sort|uniq -c|sort -rnnetstat -n | awk '/^tcp/ {++S[$NF]};END {for(a in S) print a, S[a]}'netstat -n | awk '/^tcp/ {++state[$NF]}; END {for(key in state) print key,"\t",state[key]}'netstat -n | awk '/^tcp/ {++arr[$NF]};END {for(k in arr) print k,"\t",arr[k]}'netstat -n |awk '/^tcp/ {print $NF}'|sort|uniq -c|sort -rnnetstat -ant | awk '{print $NF}' | grep -v '[a-z]' | sort | uniq -cnetstat -ant|awk '/ip:80/{split($5,ip,":");++S[ip[1]]}END{for (a in S) print S[a],a}' |sort -nnetstat -ant|awk '/:80/{split($5,ip,":");++S[ip[1]]}END{for (a in S) print S[a],a}' |sort -rn|head -n 10awk 'BEGIN{printf ("http_code\tcount_num\n")}{COUNT[$10]++}END{for (a in COUNT) printf a"\t\t"COUNT[a]"\n"}'
2. For more than 20 IP addresses (usually used to find attack sources ):
netstat -anlp|grep 80|grep tcp|awk '{print $5}'|awk -F: '{print $1}'|sort|uniq -c|sort -nr|head -n20netstat -ant |awk '/:80/{split($5,ip,":");++A[ip[1]]}END{for(i in A) print A,i}' |sort -rn|head -n20
3. Use tcpdump to sniff access to port 80 to see who is the highest
tcpdump -i eth0 -tnn dst port 80 -c 1000 | awk -F"." '{print $1"."$2"."$3"."$4}' | sort | uniq -c | sort -nr |head -20
4. Search for more time_wait connections
netstat -n|grep TIME_WAIT|awk '{print $5}'|sort|uniq -c|sort -rn|head -n20
5. Search for more SYN connections
netstat -an | grep SYN | awk '{print $5}' | awk -F: '{print $1}' | sort | uniq -c | sort -nr | more
6. Process by port column
netstat -ntlp | grep 80 | awk '{print $7}' | cut -d/ -f1
Website log analysis (Apache ):
1. Obtain the top 10 IP addresses
cat access.log|awk '{print $1}'|sort|uniq -c|sort -nr|head -10cat access.log|awk '{counts[$(11)]+=1}; END {for(url in counts) print counts [url ], url}'
2. Take the first 20 files or pages with the most visits and count all IP addresses
cat access.log|awk '{print $11}' | sort | uniq -c | sort -nr | head -20awk '{ print $1}' access.log |sort -n -r |uniq -c|wc -l
3. List the largest number of exe files transmitted (commonly used when analyzing download sites)
cat access.log |awk '($7~/\.exe/){print $10 " " $1 " " $4 " " $7}'|sort -nr|head -20
4. List the exe files with an output greater than 200000 bytes (about KB) and the number of occurrences of the corresponding files
cat access.log |awk '($10 > 200000 && $7~/\.exe/){print $7}'|sort -n|uniq -c|sort -nr|head -100
5. If the last column of the log records the page file transfer time, the most time-consuming page is listed on the client.
cat access.log |awk '($7~/\.php/){print $NF " " $1 " " $4 " " $7}'|sort -nr|head -100
6. List the most time-consuming pages (more than 60 seconds) and their occurrence times
cat access.log |awk '($NF > 60 && $7~/\.php/){print $7}'|sort -n|uniq -c|sort -nr|head -100
7. List objects whose transmission time exceeds 30 seconds
cat access.log |awk '($NF > 30){print $7}'|sort -n|uniq -c|sort -nr|head -20
8. Count website traffic (G)
cat access.log |awk '{sum+=$10} END {print sum/1024/1024/1024}'
9. Count the connections of 404
awk '($9 ~/404/)' access.log | awk '{print $9,$7}' | sort
10. Count http status.
cat access.log |awk '{counts[$(9)]+=1}; END {for(code in counts) print code, counts[code]}'cat access.log |awk '{print $9}'|sort|uniq -c|sort -rn
11. Concurrency per second:
awk '{if($9~/200|30|404/)COUNT[$4]++}END{for( a in COUNT) print a,COUNT[a]}'|sort -k 2 -nr|head -n10
12. bandwidth statistics
cat apache.log |awk '{if($7~/GET/) count++}END{print "client_request="count}'cat apache.log |awk '{BYTE+=$11}END{print "client_kbyte_out="BYTE/1024"KB"}'
13. count the number of objects and average object size
cat access.log |awk '{byte+=$10}END{ print byte/NR/1024,NR}'cat access.log |awk '{if($9~/200|30/)COUNT[$NF]++}END{for( a in COUNT) print a,COUNT[a],NR,COUNT[a]/NR*100"%"}
14. Fetch 5-minute logs
If [$ DATE_MINUTE! = $ DATE_END_MINUTE]; then # determine whether the start timestamp is equal to the end timestamp START_LINE = 'sed-n "/$ DATE_MINUTE/=" $ APACHE_LOG | head-n1 '# if not, the row number of the Start timestamp is retrieved, LINE number with the END timestamp # END_LINE = 'sed-n "/$ DATE_END_MINUTE/=" $ APACHE_LOG | tail-n1 'end _ LINE = 'sed-n "/$ DATE_END_MINUTE/ = "$ APACHE_LOG | head-n1 'sed-n" $ {START_LINE }, $ {END_LINE} p "$ APACHE_LOG> $ MINUTE_LOG # Use the row number, extract the log content within 5 minutes and store it in the temporary file. GET_START_TIME = 'sed-n "$ {START_LINE} p" $ APACHE_LOG | awk-F' [''{print $2} '| awk' {print $1}' | sed's #/# G' | sed's #: # ''# obtain the start timestamp GET_END_TIME = 'sed-n" $ {END_LINE} p "$ APACHE_LOG | awk-F' ['' {print $2} '| awk' {print $1}' | sed's #/# G' | sed's #: # ''# obtain the end timestamp through the row number
15. Spider Analysis
Check which crawlers are capturing the content
/usr/sbin/tcpdump -i eth0 -l -s 0 -w - dst port 80 | strings | grep -i user-agent | grep -i -E 'bot|crawler|slurp|spider'
Website daily Analysis 2 (Squid)
2. Collect traffic by region
zcat squid_access.log.tar.gz| awk '{print $10,$7}' |awk 'BEGIN{FS="[ /]"}{trfc[$4]+=$1}END{for(domain in trfc){printf "%s\t%d\n",domain,trfc[domain]}}'
For more efficient perl versions, please download: http://docs.linuxtone.org/soft/tools/tr.pl
Database
1. view the SQL statement executed by the database
/usr/sbin/tcpdump -i eth0 -s 0 -l -w - dst port 3306 | strings | egrep -i 'SELECT|UPDATE|DELETE|INSERT|SET|COMMIT|ROLLBACK|CREATE|DROP|ALTER|CALL'
System Debug Analysis
1. debug commands
strace -p pid
2. Trace the PID of a specified process
gdb -p pid