1. Reads PE length calculation script
01. cal_pe_depth.sh
Read_count=`gzip -dc Reads1.fq.gz |wc -l |awk ‘{print $1/4}‘`echo "read pair count : $Read_count"average_depth=`expr $Read_count \* 200 / 6000000 `echo average depth: $average_depth
2. Calculate reads coverage
02. Coverage. r
depth<-(1599999*100*2/6e6)print(depth)coverage<-ppois(40,lambda=depth,lower=F)print(coverage)
3. Calculate n50
03. cal_n50.pl
#!/usr/bin/perluse strict;my $file = shift;open (IN,"$file") or die ("can‘t open the file!\n");open (OUT,">scaf_sort_length.txt") or die ("can‘t create the file!\n");my $total_len = 0;my $ref_len = 6e6;my $flag = 1;my @count_len;my %scaf_hash;$/=">";<IN>;while(<IN>){ chomp; my ($name,@seq) = split(/\n/,$_); my $scafseq = join("",@seq); $total_len += length($scafseq); my $ID = (split(/\s+/,$name))[0]; $scaf_hash{$ID}=length($scafseq);}my $count = 0;foreach my $key (sort {$scaf_hash{$b}<=>$scaf_hash{$a}} keys %scaf_hash){ print OUT "$scaf_hash{$key}\n"; push(@count_len,$scaf_hash{$key});}foreach my $len (@count_len){ $count += $len; if ($count /$total_len >=0.5 && $flag){ print "N50: $len\n"; $flag = 0; } if ($count/$total_len>=0.9){ print "N90: $len\n"; last; }}close IN;close OUT;
4. Length cumulative curve distribution
04. scaf_acclen_graph.r
data<-read.table("scaf_sort_length.txt",header=F)len<-data[,1]head(len)sum<-0acclen<-numeric(length(len))for (i in 1:nrow(data)){ sum <-sum + len[i]; acclen[i]<-sum;}head(acclen)pdf("Length.pdf")plot(x=(1:length(acclen)),y=acclen,xlab="accseq ID",ylab="acclen",type="l",col="blue",main="scaSeq accumulate length grap")dev.off()
Common scripts for mail analysis (1)