#!/usr/bin/env python
def splic_seq_2 (Fa,r_id_,g_id_,position_1,position_2,strand):
Import Sys
Import Anti_
# sequence_file= Open (OPTIONS.FASTA_SEQ)
Sequence_file=open (FA)
Seq_line= Sequence_file.readline ()
# for Seq_line in Sequence_file:
If r_id_ in Seq_line:
Splice_seq_name =seq_line.rstrip () + ' \ t ' +g_id_+ ' \ t ' +position_1+ ' \ t ' +position_2+ ' \ t ' +strand
Print Splice_seq_name
Seq_line= Sequence_file.readline ()
Tgt_line= "
If strand== ' + ':
While Seq_line:
If ' > ' Not in Seq_line:
Tgt_line + = Seq_line.rstrip ()
Seq_line= Sequence_file.readline ()
Else
Break
Print Tgt_line[int (position_1): Int (position_2)]
Elif strand== '-':
While Seq_line:
If ' > ' Not in Seq_line:
Anti_sline=anti_.aq_antisense_strand (Seq_line)
Tgt_line + = Anti_sline.rstrip ()
Seq_line= Sequence_file.readline ()
Else
Break
TGT_LINE=TGT_LINE[::-1]
Print Tgt_line[int (position_1): Int (position_2)]
Else
Seq_line= Sequence_file.readline ()
While Seq_line:
If ' > ' Not in Seq_line:
Seq_line= Sequence_file.readline ()
Else
Break
def splice_seq_1 (GTF,ID,FA):
Import Sys
gtf_content = open (GTF) #这个句若放在顶层模块中会造成文件重复打开, will not eventually form an iteration
Ge_id=open (ID)
For line in Gtf_content:
For g_id_ in ge_id:
If G_id_.rstrip () in line:
Line_list = Line.split (' \ t ')
Splic_seq_2 (Fa,line_list[0].rstrip (), G_id_.rstrip (), Line_list[3].rstrip (), Line_list[4].rstrip (), line_list[6]. Rstrip ())
Ge_id.seek (0) #返回到文件头部从头开始
If __name__== ' __main__ ':
From Optparse import Optionparser
Ms_usage= ' Usage:%prog [-g] gtf.file [-i] gene-id.file [-f] fasta.file '
Descr= ' Use this script to according to the Gene-id to find the
Corresponding sequences from Fasta.file base on the position and
Antisense/positive-strand descripted in Gtf.file. "
Optpar=optionparser (USAGE=MS_USAGE,DESCRIPTION=DESCR)
Optpar.add_option ('-G ', '--gtf.file ', dest= ' gtf_file ',
help= ' Input the Anotition-file (FILENAME.GTF).
Optpar.add_option ('-I ', '--gene-id.file ', dest= ' gene_id ',
help= ' Input the Gene-id file contain the gene ID which you want to extract. ')
Optpar.add_option ('-f ', '--genome.fa ', dest= ' fasta_seq ',
help= ' input the genome-fasta that comtained the whole sequences ')
Options,args=optpar.parse_args ()
Gtf=options.gtf_file
Id=options. gene_id
Fa=options.fasta_seq
Splice_seq_1 (GTF,ID,FA) #不能直接splice_seq_1 (options.gtf_file,options. GENE_ID,OPTIONS.FASTA_SEQ), will be in '. ' The local error
Use the gene ID to find the corresponding position and positive chain of the gene and extract the corresponding sequence in the GTF file.