Use the gene ID to find the corresponding position and positive chain of the gene and extract the corresponding sequence in the GTF file.

Source: Internet
Author: User

#!/usr/bin/env python
def splic_seq_2 (Fa,r_id_,g_id_,position_1,position_2,strand):
Import Sys
Import Anti_
# sequence_file= Open (OPTIONS.FASTA_SEQ)
Sequence_file=open (FA)
Seq_line= Sequence_file.readline ()
# for Seq_line in Sequence_file:
If r_id_ in Seq_line:
Splice_seq_name =seq_line.rstrip () + ' \ t ' +g_id_+ ' \ t ' +position_1+ ' \ t ' +position_2+ ' \ t ' +strand
Print Splice_seq_name
Seq_line= Sequence_file.readline ()
Tgt_line= "
If strand== ' + ':
While Seq_line:
If ' > ' Not in Seq_line:
Tgt_line + = Seq_line.rstrip ()
Seq_line= Sequence_file.readline ()
Else
Break
Print Tgt_line[int (position_1): Int (position_2)]
Elif strand== '-':
While Seq_line:
If ' > ' Not in Seq_line:
Anti_sline=anti_.aq_antisense_strand (Seq_line)
Tgt_line + = Anti_sline.rstrip ()
Seq_line= Sequence_file.readline ()
Else
Break
TGT_LINE=TGT_LINE[::-1]
Print Tgt_line[int (position_1): Int (position_2)]

Else
Seq_line= Sequence_file.readline ()
While Seq_line:
If ' > ' Not in Seq_line:
Seq_line= Sequence_file.readline ()
Else
Break

def splice_seq_1 (GTF,ID,FA):
Import Sys
gtf_content = open (GTF) #这个句若放在顶层模块中会造成文件重复打开, will not eventually form an iteration
Ge_id=open (ID)
For line in Gtf_content:
For g_id_ in ge_id:
If G_id_.rstrip () in line:
Line_list = Line.split (' \ t ')
Splic_seq_2 (Fa,line_list[0].rstrip (), G_id_.rstrip (), Line_list[3].rstrip (), Line_list[4].rstrip (), line_list[6]. Rstrip ())
Ge_id.seek (0) #返回到文件头部从头开始
If __name__== ' __main__ ':
From Optparse import Optionparser
Ms_usage= ' Usage:%prog [-g] gtf.file [-i] gene-id.file [-f] fasta.file '
Descr= ' Use this script to according to the Gene-id to find the
Corresponding sequences from Fasta.file base on the position and
Antisense/positive-strand descripted in Gtf.file. "
Optpar=optionparser (USAGE=MS_USAGE,DESCRIPTION=DESCR)
Optpar.add_option ('-G ', '--gtf.file ', dest= ' gtf_file ',
help= ' Input the Anotition-file (FILENAME.GTF).
Optpar.add_option ('-I ', '--gene-id.file ', dest= ' gene_id ',
help= ' Input the Gene-id file contain the gene ID which you want to extract. ')
Optpar.add_option ('-f ', '--genome.fa ', dest= ' fasta_seq ',
help= ' input the genome-fasta that comtained the whole sequences ')
Options,args=optpar.parse_args ()
Gtf=options.gtf_file
Id=options. gene_id
Fa=options.fasta_seq
Splice_seq_1 (GTF,ID,FA) #不能直接splice_seq_1 (options.gtf_file,options. GENE_ID,OPTIONS.FASTA_SEQ), will be in '. ' The local error



Use the gene ID to find the corresponding position and positive chain of the gene and extract the corresponding sequence in the GTF file.

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.