Merge all the Fasta sequences into one file first
Cat *.fasta > Result_all_sequence.fasta
Filter for more than 90bp
Then find the Gcat
From glob import globimport osos.chdir ("d:\\") File_in = open ("Result_all_ Sequences.fasta ", ' R ') #定义文件file_in to open file Result_all_sequences.fastafa_con = file_in.read () #.read () is to read all the contents of the file File_in.close () #. Close () is to turn the file off Every_fas = fa_con.split (">") #.split (">") refers to the > Split the string into a list for the delimiter, the split list will not contain a, that is, after the split > disappear ## write to the file Out_file = open ("Res.fasta", ' W ') # w is writable for i in every_fas: if i != "": start = i.index ("\ n") if len (I[start:]) >= 90: out_file.write(">" + i) out_file.close () # read the file just filtered File_in = open ("Res.fasta", ' R ') # Define file file_in, open File Result_all_sequences.fastafa_con = file_in.read () #.read () is to read all the contents of the file File_in.close () #.close () is to turn the file off every_fas = Fa_con.split (">") #.split (">") refers to dividing a string into a list with > as a delimiter, and the segmented list will not contain a, that is, after the split > disappears out_file = open ("Ress.fasta", ' W ') ## find the gcatfor i in every_fas: of each sequence if i != "": start = I.index ("\ n") #.index ("\ n") refers to the location or subscript of the display \ n #print (I[start:]) # i[start:] is from \ n start until the end seq_con = i[start:] st_1 = seq_con.find ("GCAT") if st_1 != -1 and st_1 != len (Seq_con) - 4: out_ File.write (">" + i[:start+1]+seq_con[st_1+4:]) out_file.close ()
This article is from the "R and Python apps" blog, so be sure to keep this source http://matrix6ro.blog.51cto.com/1746429/1885915
Python filters a sequence based on sequence length to find the specified base sequence in a sequence