Since several people are asking how to batch convert CDs into pep sequences, I will extract a piece of code from another script. Deliberately writing two subfunctions is used separately. For example, the input sequence is not in the FASTA format but in the PHY format. You can adjust the cds2pep function, and the code subfunction does not need to be modified.
The format of the input file required by this code is FASTA.
Usage: Perl cds2pep. pl input. CDs. Fa out. Pep. Fa
#! /usr/bin/perl -wuse strict;die "#usage;perl $0 <input.cds.fa><out.pep.fa>\n" unless @ARGV==2;my $incds=shift;my $outpep=shift;###output the pep sequeces###cds2pep($incds,$outpep);###################################################subroutine##################################################sub cds2pep{my ($infile,$outfile)[email protected]_;open IN,'<',$infile||die;open OUT,'>',$outfile||die;my $p=code();$/=">";<IN>;$/="\n";while(<IN>){chomp;my $head=$_;$/=">";chomp(my $seq=<IN>);$/="\n";$seq=~s/\n+//g;my $out;for(my $i=0;$i<length$seq;$i+=3){my $codon=uc(substr($seq,$i,3));last if (length$codon <3);$out.= exists $p->{"standard"}{$codon} ? $p->{"standard"}{$codon} : "X";}$out =~ s/U$//;my $len=length$out;$out =~ s/([A-Z]{50})/$1\n/g;chop $out unless $len % 50;print OUT ">$head [translate_table: standard]\n$out\n"}close OUT;}#####################################sub code{my $p={ "standard" => { 'GCA' => 'A', 'GCC' => 'A', 'GCG' => 'A', 'GCT' => 'A', # Alanine 'TGC' => 'C', 'TGT' => 'C', # Cysteine 'GAC' => 'D', 'GAT' => 'D', # Aspartic Aci 'GAA' => 'E', 'GAG' => 'E', # Glutamic Aci 'TTC' => 'F', 'TTT' => 'F', # Phenylalanin 'GGA' => 'G', 'GGC' => 'G', 'GGG' => 'G', 'GGT' => 'G', # Glycine 'CAC' => 'H', 'CAT' => 'H', # Histidine 'ATA' => 'I', 'ATC' => 'I', 'ATT' => 'I', # Isoleucine 'AAA' => 'K', 'AAG' => 'K', # Lysine 'CTA' => 'L', 'CTC' => 'L', 'CTG' => 'L', 'CTT' => 'L', 'TTA' => 'L', 'TTG' => 'L', # Leucine 'ATG' => 'M', # Methionine 'AAC' => 'N', 'AAT' => 'N', # Asparagine 'CCA' => 'P', 'CCC' => 'P', 'CCG' => 'P', 'CCT' => 'P', # Proline 'CAA' => 'Q', 'CAG' => 'Q', # Glutamine 'CGA' => 'R', 'CGC' => 'R', 'CGG' => 'R', 'CGT' => 'R', 'AGA' => 'R', 'AGG' => 'R', # Arginine 'TCA' => 'S', 'TCC' => 'S', 'TCG' => 'S', 'TCT' => 'S', 'AGC' => 'S', 'AGT' => 'S', # Serine 'ACA' => 'T', 'ACC' => 'T', 'ACG' => 'T', 'ACT' => 'T', # Threonine 'GTA' => 'V', 'GTC' => 'V', 'GTG' => 'V', 'GTT' => 'V', # Valine 'TGG' => 'W', # Tryptophan 'TAC' => 'Y', 'TAT' => 'Y', # Tyrosine 'TAA' => 'U', 'TAG' => 'U', 'TGA' => 'U' # Stop } ## more translate table could be added here in future ## more translate table could be added here in future ## more translate table could be added here in future};return $p;}__END__
From CDs to pep