Linear Space Sequence Alignment

Source: Internet
Author: User
Tags rand random seed

For explanation to linear spaces sequence alignment, please refer to http://ai.stanford.edu/~serafim/CS262_2007/notes/ Lecture3.pdf.

The algorithm and equation I used is from the textbook algorithm design by Jon Kleinberg Andéva Tardos (2005):






The algorithm of Backward-space-efficient-alignment () is the reverse of space-efficient-alignment ().



The time cost is sequences from 10bp-510bp.


Memory cost of sequences from 10bp-510bp.

#include <iostream> #include <fstream> #include <cstring> #include <cmath> #include <time.h&
Gt #include <stdio.h> #include <stdlib.h> #include <ctype.h> #include <unistd.h> using namespace St

D
const int match_cost=0;
const int mismatch_cost=2;

const int gap_cost=1; int memcnt=0; Counting memory allocated struct Btnode {int x;//x axis int y;//Y axis char type;//' M '-match, ' s '-mismatch, ' g '-gap char nt;

Valid neucleotide:x-sequence x, y-sequence y, b-both};
btnode* P; int pidx=0; 
	Index of P struct Region//Region in a sequence {int start;
int end;

};
	Generate random sequence//char* genrandseq (char* seq, int n) {string nt= "ATCG";
		for (int i=0;i<n;++i) {int idx=rand ()%4;
	SEQ[I]=NT[IDX];
return to SEQ; int min (int a,int b,int c) {return (A&LT;B?A:B) <c? (
A&LT;B?A:B): C;
	void Merge (btnode* input, long p, long R) {Long mid = floor (P + r)/2);
	Long i1 = 0;
	Long i2 = p; LonG i3 = mid + 1;
	Temp array btnode* temp=new btnode[r-p+1];

	Memcnt+=sizeof (Btnode) * (r-p+1);
			Merge in sorted form the 2 arrays while (I2 <= mid && i3 <= R) if (Input[i2].x < input[i3].x)
		temp[i1++] = input[i2++];

	else temp[i1++] = input[i3++];

	Merge the remaining elements in left array while (I2 <= mid) temp[i1++] = input[i2++];

	Merge the remaining elements in right array while (i3 <= r) temp[i1++] = input[i3++];

	Move from temp array to master array for (int i = p; I <= R i++) input[i] = temp[i-p];
delete [] temp;  }//Merge sort the Btnode array by the The (element x)////Inputs://p-the start index of array input//R-  The end index of the array input void Merge_sort (btnode* input, long p, long R) {if (P < r) {Long mid = floor (P +
		R)/2);
		Merge_sort (Input, p, mid);
		Merge_sort (Input, mid + 1, R);
	Merge (Input, p, R); }////Insert new node into P//void Insertnodetop (iNT x, int y) {bool isok=true;
	for (int i=0;i<pidx;++i) {if (x==p[i].x && y==p[i].y) Isok=false;
		} if (isOK) {p[pidx].x=x;
		P[pidx].y=y;
	pidx++; 
}////Align seqences////Recurrence:/OPT (I,J) =min{//Alpha (I,J) +opt (i-1,j-1),//Delta+opt (I-1,J),
Delta+opt (I,J-1)//}/Where mismatch cost alpha=2, match cost alpha=0, and Gap cost delta=1. Parameters://x,y-sequences//M-length of sequence X//N-length of sequence Y//int Alignment (char* X, Reg
	ion& Rx, char* Y, region& ry) {int m=rx.end-rx.start+1;

	int n=ry.end-ry.start+1; int** S=new int*[m+1];
	Temp array to store scores of alignment for (int i=0;i<m+1;++i) s[i]=new int[n+1];

	memcnt+=sizeof (int) * (m+1) * (n+1);
	for (int i=0;i<=m;++i) s[i][0]=i*gap_cost;
	
	for (int j=0;j<=n;++j) s[0][j]=j*gap_cost;
			for (int j=1;j<=n;++j) {for (int i=1;i<=m;++i) {int alpha;
			if (x[i-1]==y[j-1]) alpha=match_cost; else Alpha=miSmatch_cost;
		S[i][j]=min (Alpha+s[i-1][j-1],gap_cost+s[i-1][j],gap_cost+s[i][j-1]);
	}//End of A for//Trace back int ix=m;
	int iy=n; 	
	int bt_len=m+n;//length of array to record tracing back int cnt=bt_len-1; btnode* Bt=new Btnode[bt_len];
	Record BackTrace path memcnt+=sizeof (btnode) *bt_len;
		while (ix>=1 && iy>=1) {bt[cnt].x=ix-1;
		Bt[cnt].y=iy-1;
				
		Insertnodetop (rx.start+ix-1,ry.start+iy-1);
			if (S[ix][iy]==s[ix-1][iy-1]+match_cost && x[ix-1]==y[iy-1])//MATCH {bt[cnt].type= ' m ';
			Bt[cnt].nt= ' B ';
			ix--;
		iy--;
			else if (s[ix][iy]==s[ix-1][iy-1]+mismatch_cost) {//mismatch bt[cnt].type= ' S ';
			Bt[cnt].nt= ' B ';
			ix--;
		iy--;
			else if (s[ix][iy]==s[ix-1][iy]+gap_cost) {//GAP bt[cnt].type= ' g ';
			bt[cnt].nt= ' x ';
		ix--;
			else if (s[ix][iy]==s[ix][iy-1]+gap_cost) {//GAP bt[cnt].type= ' g ';
			Bt[cnt].nt= ' Y ';
		iy--;
	} cnt--;
		while (iy>0) {bt[cnt].x=ix-1;
		Bt[cnt].y=iy-1; InsertnodetoP (rx.start+ix-1,ry.start+iy-1);
			if (s[ix][iy]==s[ix][iy-1]+gap_cost) {//GAP bt[cnt].type= ' g ';
			Bt[cnt].nt= ' Y ';
		iy--;
	} cnt--;
		while (ix>0) {bt[cnt].x=ix-1;
		Bt[cnt].y=iy-1;
		
		Insertnodetop (rx.start+ix-1,ry.start+iy-1);
			if (s[ix][iy]==s[ix-1][iy]+gap_cost) {//GAP bt[cnt].type= ' g ';
			bt[cnt].nt= ' x ';
		ix--;
	} cnt--;
	/*//Print the alignment cnt++;
	TEST for (int i=cnt;i<bt_len;++i) {cout<< "(" <<bt[i].x<< "," <<bt[i].y<< ")";

	} cout<<endl; Print sequence x for (int i=cnt;i<bt_len;++i) {if (bt[i].nt== ' x ') | |
			bt[i].nt== ' B ') {if (bt[i].x<0) cout<< '-';
		else cout<<x[bt[i].x];
		else {cout<< '-';
	}} cout<<endl;
		Print middle line for (int i=cnt;i<bt_len;++i) {if (bt[i].type== ' m ') cout<< ' | ';
	else cout<< ';
	} cout<<endl; Print sequence y for (int i=cnt;i<bt_len;++i) {if (bt[i].nt== ' y ') | | Bt[i].nt== ' B ') {if (bt[i].y<0) cout<< '-';
		else cout<<y[bt[i].y];
		else {cout<< '-'; }} cout<<endl;

	*/delete [] BT;

	int score=s[m][n];
	for (int i=0;i<m+1;++i) Delete [] s[i];

	delete [] S;
return score; ////space efficient alignment, which caculate the length of the//shortest path from (0,0) to (I,J) and only can R
Eturns the optimal value. int spaceefficientalignment (int** S, char* X, int m, char* Y, int n) {for (int i=0;i<=m;++i) {S[i][0]=i*gap_cos
	T
		for (int j=1;j<=n;++j) {s[0][1]=j*gap_cost;
			for (int i=1;i<=m;++i) {int alpha;
			if (x[i-1]==y[j-1]) alpha=match_cost;

			else Alpha=mismatch_cost;
		S[i][1]=min (Alpha+s[i-1][0],gap_cost+s[i-1][1],gap_cost+s[i][0]);
	}//Move column 1 of ' to ' to ' 0 to ' room for next iteration for (int i=0;i<=m;++i) s[i][0]=s[i][1];
return s[m][1]; ////Backward space efficient alignment, which caculate the length of The//Shortest path from (i,j) to (m,n) and can returns the optimal value. int backwardspaceefficientalignment (int** S, char* X, int m, char* Y, int n) {for (int i=m;i>=0;--i) {s[i][1]= (
	M-i) *gap_cost;
		for (int j=n-1;j>=0;--j) {s[m][0]= (n-j) *gap_cost;
			for (int i=m-1;i>=0;--i) {int alpha;
			if (X[m-i-1]==y[j]) if (x[i]==y[j]) alpha=match_cost;

			else Alpha=mismatch_cost;
		S[i][0]=min (alpha+s[i+1][1],gap_cost+s[i+1][0],gap_cost+s[i][1]);
	}//Move column 0 of ' to ' to ' 1 to ' room for next iteration for (int i=0;i<=m;++i) s[i][1]=s[i][0];
return s[0][0]; ////Find out the Q this minimize the score F (Q,N/2) +g (Q,N/2)///Inputs://Sf-score vector of forward space EF ficient alignment//sb-score vector of backward space efficient alignment//m-length of the score vector//int Fi
	Ndminscore (int** sf,int** sb,int m) {int min=sf[1][1]+sb[1][0];
	int ret=1; for (int q=2;q<=m;++q) {if (min>sF[q][1]+sb[q][0]) {min=sf[q][1]+sb[q][0]; Ret=q-1;
The RET should is the position in the sequence and so minus 1} to return ret; }////Sequence alignemnt using divide and conquer//void Divideconqueralignment (char* X, region& Rx, char* Y, Regi
	on& ry) {int m=rx.end-rx.start+1;

	int n=ry.end-ry.start+1; if (m<=2 | | n<=2) {alignment (&x[rx.start],rx,&y[ry.start],ry);//cout<< "Call alignment" <<e Ndl
	TEST only return; } int** score_forward=new int*[m+1]; (m+1) x2 matrix to store scores of alignment int** Score_backward=new int*[m+1];
		(m+1) x2 matrix to store scores of alignment for (int i=0;i<m+1;++i) {score_forward[i]=new int[2];
	Score_backward[i]=new int[2];

	} memcnt+=sizeof (int) *2* (m+1) *2;
	Get the middle points in path Region Tempry;
	Tempry.start=ry.start;
	Tempry.end= (Ry.start+ry.end)/2;
	Spaceefficientalignment (score_forward,&x[rx.start],m,&y[tempry.start],tempry.end-tempry.start+1); Tempry. start= (Ry.start+ry.end)/2+1;
	Tempry.end=ry.end; Backwardspaceefficientalignment (Score_backward,&x[rx.start],m,&y[tempry.start],tempry.end-tempry.start

	+1);
int Q=findminscore (score_forward,score_backward,m) +rx.start; cout<< "q=" <<q<< ", n/2=" << (ry.start+ry.end)/2<<endl;

	TEST only Insertnodetop (q, (ry.start+ry.end)/2);
	Divide and conquer Region temprx;
	Temprx.start=rx.start;
	temprx.end=q;
	Tempry.start=ry.start;
	Tempry.end= (Ry.start+ry.end)/2;
	if (temprx.start<=temprx.end && tempry.start<=tempry.end) divideconqueralignment (X,temprx,Y,tempry);
	temprx.start=q+1;
	Temprx.end=rx.end;
	Tempry.start= (ry.start+ry.end)/2+1;
	Tempry.end=ry.end;

	if (temprx.start<=temprx.end && tempry.start<=tempry.end) divideconqueralignment (X,temprx,Y,tempry);
		for (int i=0;i<m+1;++i) {delete [] score_forward[i];
	Delete [] score_backward[i];
	} delete [] Score_forward;
delete [] score_backward; }////BackTraceand print the alignment//void BackTrace (char* x,char* Y) {merge_sort (p,0,pidx-1);//Sort nodes in path according to E
	Lement x//Adjust all nodes into path by element y int prev_x=-1,prev_y=-1;
	Btnode temp;
			for (int i=0;i<pidx;++i) {if (PREV_Y&GT;P[I].Y) {temp=p[i-1];
			P[i-1]=p[i];
		P[i]=temp;
	} prev_y=p[i].y; }//for (int i=0;i<pidx;++i)//cout<< ("<<P[i].x<<", "<<P[i].y<<") "<<"; TEST only//cout<<endl;
	TEST only//Print sequence X prev_x=-1; for (int i=0;i<pidx;++i) {if (p[i].x<0 | |
		p[i].x==prev_x) cout<< '-';
		else cout<<x[p[i].x];
	prev_x=p[i].x;

	} cout<<endl;
	Print Middle Line prev_x=-1;
	Prev_y=-1; for (int i=0;i<pidx;++i) {if (X[p[i].x]==y[p[i].y] && prev_x!=p[i].x && prev_y!=p[i].y) COUT&LT;&L
		t; ' | ';
		else cout<< ';
		prev_x=p[i].x;
	PREV_Y=P[I].Y;

	} cout<<endl;
	Print sequence Y prev_y=-1; for (int i=0;I<pidx;++i) {if (p[i].y<0 | |
		p[i].y==prev_y) cout<< '-';
		else cout<<y[p[i].y];
	PREV_Y=P[I].Y;

} cout<<endl;

	////main START here//int main (int argc, char** argv) {//Initialize random seed Srand (Time (NULL));	char* Output=null;		output file int seqlen=0;

	sequence length int c; while ((c = getopt (argc, argv, "N:o:"))!=-1} {switch (c) {case ' n ': Seqlen = atoi (opt 
             		ARG);
		Break 
             		Case ' o ': output = Optarg;
           	Break Case '? ': if (optopt = = ' n ') fprintf (stderr, "option-%c requires an argument.\n\n", Opto
             		PT);
             		else if (Isprint (optopt)) fprintf (stderr, "Unknown option '-%c '. \ n", optopt);
                        		else fprintf (stderr, "Unknown option character ' \\x%x '. \ n"),
             		OPTOPT);
         return 1;  	Default:abort ();
	} int M=seqlen;

	int N=seqlen-rand ()% (SEQLEN/5);
	char* Ref=new Char[m];
	char* Seq=new Char[n];
	
	Memcnt+=sizeof (Char) * (m+n);
	Generate random sequences ref=genrandseq (REF,M);

	Seq=genrandseq (Seq,n);
	P=new Btnode[m+n];
	
	Memcnt+=sizeof (Btnode) * (m+n); cout<< "seq 1:" <<ref<<endl; TEST only cout<< "seq 2:" <<seq<<endl;
	TEST only Region Rx, ry;
	rx.start=0;
	Rx.end=m-1;
	ry.start=0;
ry.end=n-1; Alignment (Ref,rx,seq,ry);

	TEST only Divideconqueralignment (Ref,rx,seq,ry);

	BackTrace (REF,SEQ);
	delete [] ref;
	delete [] seq;

	delete [] P;
	Record of the space costs FStream FS;
	Fs.open ("Memory_cost", Fstream::out|fstream::app);
	fs<<m<< "BP:" <<memcnt<<endl;

	Fs.close ();
return 0;
 }


Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.