For explanation to linear spaces sequence alignment, please refer to http://ai.stanford.edu/~serafim/CS262_2007/notes/ Lecture3.pdf.
The algorithm and equation I used is from the textbook algorithm design by Jon Kleinberg Andéva Tardos (2005):
The algorithm of Backward-space-efficient-alignment () is the reverse of space-efficient-alignment ().
The time cost is sequences from 10bp-510bp.
Memory cost of sequences from 10bp-510bp.
#include <iostream> #include <fstream> #include <cstring> #include <cmath> #include <time.h&
Gt #include <stdio.h> #include <stdlib.h> #include <ctype.h> #include <unistd.h> using namespace St
D
const int match_cost=0;
const int mismatch_cost=2;
const int gap_cost=1; int memcnt=0; Counting memory allocated struct Btnode {int x;//x axis int y;//Y axis char type;//' M '-match, ' s '-mismatch, ' g '-gap char nt;
Valid neucleotide:x-sequence x, y-sequence y, b-both};
btnode* P; int pidx=0;
Index of P struct Region//Region in a sequence {int start;
int end;
};
Generate random sequence//char* genrandseq (char* seq, int n) {string nt= "ATCG";
for (int i=0;i<n;++i) {int idx=rand ()%4;
SEQ[I]=NT[IDX];
return to SEQ; int min (int a,int b,int c) {return (A<B?A:B) <c? (
A<B?A:B): C;
void Merge (btnode* input, long p, long R) {Long mid = floor (P + r)/2);
Long i1 = 0;
Long i2 = p; LonG i3 = mid + 1;
Temp array btnode* temp=new btnode[r-p+1];
Memcnt+=sizeof (Btnode) * (r-p+1);
Merge in sorted form the 2 arrays while (I2 <= mid && i3 <= R) if (Input[i2].x < input[i3].x)
temp[i1++] = input[i2++];
else temp[i1++] = input[i3++];
Merge the remaining elements in left array while (I2 <= mid) temp[i1++] = input[i2++];
Merge the remaining elements in right array while (i3 <= r) temp[i1++] = input[i3++];
Move from temp array to master array for (int i = p; I <= R i++) input[i] = temp[i-p];
delete [] temp; }//Merge sort the Btnode array by the The (element x)////Inputs://p-the start index of array input//R- The end index of the array input void Merge_sort (btnode* input, long p, long R) {if (P < r) {Long mid = floor (P +
R)/2);
Merge_sort (Input, p, mid);
Merge_sort (Input, mid + 1, R);
Merge (Input, p, R); }////Insert new node into P//void Insertnodetop (iNT x, int y) {bool isok=true;
for (int i=0;i<pidx;++i) {if (x==p[i].x && y==p[i].y) Isok=false;
} if (isOK) {p[pidx].x=x;
P[pidx].y=y;
pidx++;
}////Align seqences////Recurrence:/OPT (I,J) =min{//Alpha (I,J) +opt (i-1,j-1),//Delta+opt (I-1,J),
Delta+opt (I,J-1)//}/Where mismatch cost alpha=2, match cost alpha=0, and Gap cost delta=1. Parameters://x,y-sequences//M-length of sequence X//N-length of sequence Y//int Alignment (char* X, Reg
ion& Rx, char* Y, region& ry) {int m=rx.end-rx.start+1;
int n=ry.end-ry.start+1; int** S=new int*[m+1];
Temp array to store scores of alignment for (int i=0;i<m+1;++i) s[i]=new int[n+1];
memcnt+=sizeof (int) * (m+1) * (n+1);
for (int i=0;i<=m;++i) s[i][0]=i*gap_cost;
for (int j=0;j<=n;++j) s[0][j]=j*gap_cost;
for (int j=1;j<=n;++j) {for (int i=1;i<=m;++i) {int alpha;
if (x[i-1]==y[j-1]) alpha=match_cost; else Alpha=miSmatch_cost;
S[i][j]=min (Alpha+s[i-1][j-1],gap_cost+s[i-1][j],gap_cost+s[i][j-1]);
}//End of A for//Trace back int ix=m;
int iy=n;
int bt_len=m+n;//length of array to record tracing back int cnt=bt_len-1; btnode* Bt=new Btnode[bt_len];
Record BackTrace path memcnt+=sizeof (btnode) *bt_len;
while (ix>=1 && iy>=1) {bt[cnt].x=ix-1;
Bt[cnt].y=iy-1;
Insertnodetop (rx.start+ix-1,ry.start+iy-1);
if (S[ix][iy]==s[ix-1][iy-1]+match_cost && x[ix-1]==y[iy-1])//MATCH {bt[cnt].type= ' m ';
Bt[cnt].nt= ' B ';
ix--;
iy--;
else if (s[ix][iy]==s[ix-1][iy-1]+mismatch_cost) {//mismatch bt[cnt].type= ' S ';
Bt[cnt].nt= ' B ';
ix--;
iy--;
else if (s[ix][iy]==s[ix-1][iy]+gap_cost) {//GAP bt[cnt].type= ' g ';
bt[cnt].nt= ' x ';
ix--;
else if (s[ix][iy]==s[ix][iy-1]+gap_cost) {//GAP bt[cnt].type= ' g ';
Bt[cnt].nt= ' Y ';
iy--;
} cnt--;
while (iy>0) {bt[cnt].x=ix-1;
Bt[cnt].y=iy-1; InsertnodetoP (rx.start+ix-1,ry.start+iy-1);
if (s[ix][iy]==s[ix][iy-1]+gap_cost) {//GAP bt[cnt].type= ' g ';
Bt[cnt].nt= ' Y ';
iy--;
} cnt--;
while (ix>0) {bt[cnt].x=ix-1;
Bt[cnt].y=iy-1;
Insertnodetop (rx.start+ix-1,ry.start+iy-1);
if (s[ix][iy]==s[ix-1][iy]+gap_cost) {//GAP bt[cnt].type= ' g ';
bt[cnt].nt= ' x ';
ix--;
} cnt--;
/*//Print the alignment cnt++;
TEST for (int i=cnt;i<bt_len;++i) {cout<< "(" <<bt[i].x<< "," <<bt[i].y<< ")";
} cout<<endl; Print sequence x for (int i=cnt;i<bt_len;++i) {if (bt[i].nt== ' x ') | |
bt[i].nt== ' B ') {if (bt[i].x<0) cout<< '-';
else cout<<x[bt[i].x];
else {cout<< '-';
}} cout<<endl;
Print middle line for (int i=cnt;i<bt_len;++i) {if (bt[i].type== ' m ') cout<< ' | ';
else cout<< ';
} cout<<endl; Print sequence y for (int i=cnt;i<bt_len;++i) {if (bt[i].nt== ' y ') | | Bt[i].nt== ' B ') {if (bt[i].y<0) cout<< '-';
else cout<<y[bt[i].y];
else {cout<< '-'; }} cout<<endl;
*/delete [] BT;
int score=s[m][n];
for (int i=0;i<m+1;++i) Delete [] s[i];
delete [] S;
return score; ////space efficient alignment, which caculate the length of the//shortest path from (0,0) to (I,J) and only can R
Eturns the optimal value. int spaceefficientalignment (int** S, char* X, int m, char* Y, int n) {for (int i=0;i<=m;++i) {S[i][0]=i*gap_cos
T
for (int j=1;j<=n;++j) {s[0][1]=j*gap_cost;
for (int i=1;i<=m;++i) {int alpha;
if (x[i-1]==y[j-1]) alpha=match_cost;
else Alpha=mismatch_cost;
S[i][1]=min (Alpha+s[i-1][0],gap_cost+s[i-1][1],gap_cost+s[i][0]);
}//Move column 1 of ' to ' to ' 0 to ' room for next iteration for (int i=0;i<=m;++i) s[i][0]=s[i][1];
return s[m][1]; ////Backward space efficient alignment, which caculate the length of The//Shortest path from (i,j) to (m,n) and can returns the optimal value. int backwardspaceefficientalignment (int** S, char* X, int m, char* Y, int n) {for (int i=m;i>=0;--i) {s[i][1]= (
M-i) *gap_cost;
for (int j=n-1;j>=0;--j) {s[m][0]= (n-j) *gap_cost;
for (int i=m-1;i>=0;--i) {int alpha;
if (X[m-i-1]==y[j]) if (x[i]==y[j]) alpha=match_cost;
else Alpha=mismatch_cost;
S[i][0]=min (alpha+s[i+1][1],gap_cost+s[i+1][0],gap_cost+s[i][1]);
}//Move column 0 of ' to ' to ' 1 to ' room for next iteration for (int i=0;i<=m;++i) s[i][1]=s[i][0];
return s[0][0]; ////Find out the Q this minimize the score F (Q,N/2) +g (Q,N/2)///Inputs://Sf-score vector of forward space EF ficient alignment//sb-score vector of backward space efficient alignment//m-length of the score vector//int Fi
Ndminscore (int** sf,int** sb,int m) {int min=sf[1][1]+sb[1][0];
int ret=1; for (int q=2;q<=m;++q) {if (min>sF[q][1]+sb[q][0]) {min=sf[q][1]+sb[q][0]; Ret=q-1;
The RET should is the position in the sequence and so minus 1} to return ret; }////Sequence alignemnt using divide and conquer//void Divideconqueralignment (char* X, region& Rx, char* Y, Regi
on& ry) {int m=rx.end-rx.start+1;
int n=ry.end-ry.start+1; if (m<=2 | | n<=2) {alignment (&x[rx.start],rx,&y[ry.start],ry);//cout<< "Call alignment" <<e Ndl
TEST only return; } int** score_forward=new int*[m+1]; (m+1) x2 matrix to store scores of alignment int** Score_backward=new int*[m+1];
(m+1) x2 matrix to store scores of alignment for (int i=0;i<m+1;++i) {score_forward[i]=new int[2];
Score_backward[i]=new int[2];
} memcnt+=sizeof (int) *2* (m+1) *2;
Get the middle points in path Region Tempry;
Tempry.start=ry.start;
Tempry.end= (Ry.start+ry.end)/2;
Spaceefficientalignment (score_forward,&x[rx.start],m,&y[tempry.start],tempry.end-tempry.start+1); Tempry. start= (Ry.start+ry.end)/2+1;
Tempry.end=ry.end; Backwardspaceefficientalignment (Score_backward,&x[rx.start],m,&y[tempry.start],tempry.end-tempry.start
+1);
int Q=findminscore (score_forward,score_backward,m) +rx.start; cout<< "q=" <<q<< ", n/2=" << (ry.start+ry.end)/2<<endl;
TEST only Insertnodetop (q, (ry.start+ry.end)/2);
Divide and conquer Region temprx;
Temprx.start=rx.start;
temprx.end=q;
Tempry.start=ry.start;
Tempry.end= (Ry.start+ry.end)/2;
if (temprx.start<=temprx.end && tempry.start<=tempry.end) divideconqueralignment (X,temprx,Y,tempry);
temprx.start=q+1;
Temprx.end=rx.end;
Tempry.start= (ry.start+ry.end)/2+1;
Tempry.end=ry.end;
if (temprx.start<=temprx.end && tempry.start<=tempry.end) divideconqueralignment (X,temprx,Y,tempry);
for (int i=0;i<m+1;++i) {delete [] score_forward[i];
Delete [] score_backward[i];
} delete [] Score_forward;
delete [] score_backward; }////BackTraceand print the alignment//void BackTrace (char* x,char* Y) {merge_sort (p,0,pidx-1);//Sort nodes in path according to E
Lement x//Adjust all nodes into path by element y int prev_x=-1,prev_y=-1;
Btnode temp;
for (int i=0;i<pidx;++i) {if (PREV_Y>P[I].Y) {temp=p[i-1];
P[i-1]=p[i];
P[i]=temp;
} prev_y=p[i].y; }//for (int i=0;i<pidx;++i)//cout<< ("<<P[i].x<<", "<<P[i].y<<") "<<"; TEST only//cout<<endl;
TEST only//Print sequence X prev_x=-1; for (int i=0;i<pidx;++i) {if (p[i].x<0 | |
p[i].x==prev_x) cout<< '-';
else cout<<x[p[i].x];
prev_x=p[i].x;
} cout<<endl;
Print Middle Line prev_x=-1;
Prev_y=-1; for (int i=0;i<pidx;++i) {if (X[p[i].x]==y[p[i].y] && prev_x!=p[i].x && prev_y!=p[i].y) COUT<&L
t; ' | ';
else cout<< ';
prev_x=p[i].x;
PREV_Y=P[I].Y;
} cout<<endl;
Print sequence Y prev_y=-1; for (int i=0;I<pidx;++i) {if (p[i].y<0 | |
p[i].y==prev_y) cout<< '-';
else cout<<y[p[i].y];
PREV_Y=P[I].Y;
} cout<<endl;
////main START here//int main (int argc, char** argv) {//Initialize random seed Srand (Time (NULL)); char* Output=null; output file int seqlen=0;
sequence length int c; while ((c = getopt (argc, argv, "N:o:"))!=-1} {switch (c) {case ' n ': Seqlen = atoi (opt
ARG);
Break
Case ' o ': output = Optarg;
Break Case '? ': if (optopt = = ' n ') fprintf (stderr, "option-%c requires an argument.\n\n", Opto
PT);
else if (Isprint (optopt)) fprintf (stderr, "Unknown option '-%c '. \ n", optopt);
else fprintf (stderr, "Unknown option character ' \\x%x '. \ n"),
OPTOPT);
return 1; Default:abort ();
} int M=seqlen;
int N=seqlen-rand ()% (SEQLEN/5);
char* Ref=new Char[m];
char* Seq=new Char[n];
Memcnt+=sizeof (Char) * (m+n);
Generate random sequences ref=genrandseq (REF,M);
Seq=genrandseq (Seq,n);
P=new Btnode[m+n];
Memcnt+=sizeof (Btnode) * (m+n); cout<< "seq 1:" <<ref<<endl; TEST only cout<< "seq 2:" <<seq<<endl;
TEST only Region Rx, ry;
rx.start=0;
Rx.end=m-1;
ry.start=0;
ry.end=n-1; Alignment (Ref,rx,seq,ry);
TEST only Divideconqueralignment (Ref,rx,seq,ry);
BackTrace (REF,SEQ);
delete [] ref;
delete [] seq;
delete [] P;
Record of the space costs FStream FS;
Fs.open ("Memory_cost", Fstream::out|fstream::app);
fs<<m<< "BP:" <<memcnt<<endl;
Fs.close ();
return 0;
}