Poj 4092: Jeans
Question:
Description
The genetic geographic program is a collaborative research project between IBM and the National Geographic Society of America to analyze DNA from hundreds of thousands of donors to study the map of human migration on Earth.
As an IBM investigator, you are asked to write a program to discover common DNA fragments and associate them with personal survey information to identify new genetic markers.
The DNA base sequence is recorded by sequentially arranging the nitrogen-containing bases found in the molecules. There are four kinds of bases: O (A), D (t), D (G), and D (c ). A 6-base DNA sequence can be expressed as tagacc.
Given the base sequence of a set of DNA, determine the longest series of bases occurring in all sequences.
Input
The input starts with an integer N as a separate row, indicating the number of datasets. Each dataset includes the following components:
A positive integer m (2 <= m <= 10) indicates the number of base sequences in the dataset.
M rows. Each row contains a sequence containing 60 bases.
Output
For each dataset, the longest sub-base sequence of all base sequences is output. If the longest common base subsequence is smaller than three, "no significant commonalities" is output. If multiple Longest Common Base subsequences with the same length exist, only the first sequence in alphabetical order is output.
Sample Input
32GATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA3GATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATAGATACTAGATACTAGATACTAGATACTAAAGGAAAGGGAAAAGGGGAAAAAGGGGGAAAAGATACCAGATACCAGATACCAGATACCAAAGGAAAGGGAAAAGGGGAAAAAGGGGGAAAA3CATCATCATCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACATCATCATAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACATCATCATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
Sample output
no significant commonalitiesAGATACCATCATCAT
Solution: Let's first look at the subproblem: 1) Find all the public substrings of two strings? The solution is to dynamically plan and create a table, and then search for the table to collect all the public strings. 2) What is the sorting of the string set? Is the stable_sort function in STL (parameter 1, parameter 2) 3) used to calculate the intersection of two ordered sets? We use the merge idea to calculate the intersection, and then we are considering our big problem for a group of data, so that they can be randomly combined to find a group of all public substrings, if the number of strings is odd, then, the last string and any other string are combined to sort all public substrings in each group, and all public substrings in all groups are combined to obtain the intersection, if the number of public substrings is odd, use the same method as above to iterate the previous step until the public substrings of all groups are obtained, and then find the longest one (if there are multiple, output the first one, which is sorted)
Lab data
32GATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA3GATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATAGATACTAGATACTAGATACTAGATACTAAAGGAAAGGGAAAAGGGGAAAAAGGGGGAAAAGATACCAGATACCAGATACCAGATACCAAAGGAAAGGGAAAAGGGGAAAAAGGGGGAAAA3CATCATCATCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACATCATCATAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACATCATCATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
Lab results
Code
# Include <iostream> # include <string> # include <list> # include <algorithm> # include <fstream> # include <map> using namespace STD; int const elem_length = 60; typedef list <string> * ELEM; typedef ELEM * Table; // based on the dynamic table, create a hash table void get_table (INT ** data, int row, int column, table & table, string a); // obtain the Public String dynamic table void common_substring (string a, string B, int ** & Data, Int & Row, int & Column); // read all the data and place it in a two-dimensional data array. Void read_data (); // main solution void main_solution (string * data, int m); // returns the intersection list <string> common_two_list (list <string> * list1, list <string> * list2); // obtain the Public String dynamic table void common_substring (string a, string B, int ** & Data, Int & Row, int & Column) {ROW =. length () + 1; column = B. length () + 1; Data = new int * [row]; for (INT I = 0; I <row; I ++) {data [I] = new int [column];} For (INT I = 0; I <row; I ++) {data [I] [0] = 0 ;} for (INT I = 0; I <column; I ++) {data [0] [I] = 0 ;}for (INT I = 1; I <row; I ++) for (Int J = 1; j <column; j ++) {if (a [I-1] = B [J-1]) {data [I] [J] = data [I-1] [J-1] + 1;} else data [I] [J] = 0;} // ofstream writer; // writer. open ("out.txt"); // writer <""; // For (INT I = 0; I <elem_length; I ++) // writer <'\ t' <B [I]; // writer <Endl; // For (INT I = 0; I <elem_length; I ++) // {// writer <A [I]; // For (Int J = 0; j <elem_length; j ++) // {// writer <'\ t' <data [I + 1] [J + 1]; //} // Writer <Endl; //} // writer. close () ;}// create a hash table of the corresponding length based on the dynamic table void get_table (INT ** data, int row, int column, Table & table, string) {int N; string STR; Table = new ELEM [elem_length]; for (INT I = 0; I <elem_length; I ++) table [I] = new list <string>; For (INT I = row-1; I> 2; I --) for (Int J = column-1; j> 2; J --) {If (data [I] [J]> = 3) {n = data [I] [J]; STR = ""; while (n> 0) {STR = STR + A [I-n]; n --;} table [DATA [I] [J]-> push_back (STR); }}For (INT I = ELEM_LENGTH-1; I> = 0; I --) {Map <string, bool> mymap; For (list <string> :: iterator it = table [I]-> begin (); it! = Table [I]-> end (); It ++) {If (mymap. find (* It) = mymap. end () mymap. insert (make_pair (* It, true);} if (I! = ELEM_LENGTH-1) {for (list <string>: iterator it = table [I + 1]-> begin (); it! = Table [I + 1]-> end (); It ++) {string a = it-> substr (0, IT-> size ()-1 ); string B = it-> substr (1, IT-> size ()-1); If (mymap. find (A) = mymap. end () mymap. insert (make_pair (A, true); If (mymap. find (B) = mymap. end () mymap. insert (make_pair (B, true) ;}} table [I]-> clear (); For (Map <string, bool >:: iterator it = mymap. begin (); it! = Mymap. end (); It ++) Table [I]-> push_back (IT-> first); stable_sort (Table [I]-> begin (), table [I]-> end () ;}// read all the data and put it in the two-dimensional data array void read_data () {string * DNA; int N; int m; ifstream reader; reader. open ("data.txt"); reader> N; while (n> 0) {reader> m; DNA = new string [m]; for (INT I = 0; I <m; I ++) Reader> DNA [I]; n --; main_solution (DNA, m);} reader. close () ;}// main solution void main_solution (string * data, int m) {int ** Dp_table; int row; int column; int table_number = (m + 1)/2; table * Table = new table [table_number]; List <string> result; bool exist = true; for (INT I = 0, j = 0; I <m-1; I = I + 2) {common_substring (data [I], data [I + 1], dp_table, row, column); get_table (dp_table, row, column, table [J ++], data [I]);} If (M % 2 = 1) {common_substring (data [0], data M-1], dp_table, row, column); get_table (dp_table, row, column, table [table_number-1], Data [0]);} For (INT I = ELEM_LENGTH-1; I> = 2; I --) {If (table_number> 1) {result = common_two_list (Table [0] [I], table [1] [I]); Int J = 2; while (! Result. empty () & J <table_number) {result = common_two_list (& result, table [J] [I]); j ++;} If (result. empty () {continue;} else {cout <result. front () <Endl; break;} else {If (Table [0] [ELEM_LENGTH-1]-> Empty () cout <"no significant commonalities" <Endl; else cout <Table [0] [ELEM_LENGTH-1]-> Front () <Endl; break ;}}} // returns the intersection list <string> common_two_list (list <string> * list1, list <string> * list2) {list <string> resul T; List <string >:: iterator it1, it2; it1 = list1-> begin (); it2 = list2-> begin (); While (it1! = List1-> end () & it2! = List2-> end () {If (it1-> compare (* it2) <0) {it1 ++;} else if (it1-> compare (* it2)> 0) {it2 ++;} else {result. push_back (* it1); it1 ++; it2 ++;} return result;} int main () {read_data (); System ("pause"); Return 0 ;}
Poj 4092: Jeans