For the original question, refer to exercise section 5 and exercise section 24 of taocp3.
The question is: there is an ordered one-way linked list. Each element is like <A, B>, and A and B are an element (represented by a string in my solution ). The input is a file, the file has a N-1 like the above ancestor (n is the number of actual elements in the linked list), the order is random. It is required to output the original sequence indicated by the ancestor. For example, if the input is <K4, K5> <K1, K2> <K3, K4> <k2, K3>, the linked list is rebuilt to <K1, K2> <k2, k3> <K3, K4> <K4, K5>. The output is K1 K2 K3 K4 K5.
Code:
/************************* copyright @watofall no guarantee for bugfree!**************************/#include <iostream>#include <fstream>#include "string.h"#include <vector>#include <algorithm>using namespace std;#define MAX_CHAR 100#define MAX_PAIR 100typedef struct Pair{ char *val; char *next;}Pair;typedef struct NumVal{ int sequence; char *val;}NumVal;bool comp1(Pair a, Pair b){ if(strcmp(a.val, b.val) < 0) return true; else return false;}bool comp2(Pair a, Pair b){ if(strcmp(a.next, b.next) < 0) return true; else return false;}bool comp3(NumVal a, NumVal b){ if(strcmp(a.val, b.val) < 0) return true; else return false;}bool comp4(NumVal a, NumVal b){ return a.sequence < b.sequence;}int main() { ifstream fin ("sort_pair2.in"); int i, j, k, count, t; char buf1[MAX_CHAR], buf2[MAX_CHAR]; vector<Pair> pairs, pairs_b, pairs_; vector<NumVal> num_vals, num_vals_; for(i = 0; fin.good(); i ++) { fin >> buf1 >> buf2; Pair new_pair; new_pair.val = new char[strlen(buf1)]; new_pair.next = new char[strlen(buf2)]; strcpy(new_pair.val, buf1); strcpy(new_pair.next, buf2); pairs.push_back(new_pair); } count = i + 1; pairs_b = pairs; sort(pairs.begin(), pairs.end(), comp1); sort(pairs_b.begin(),pairs_b.end(), comp2); // find the last element for(i = 0, j = 0; j < count - 1;) { if(i >= count - 1) // find miss match { NumVal new_num_val; new_num_val.sequence = count; new_num_val.val = pairs_b[j].next; num_vals.push_back(new_num_val); j ++; } else if(strcmp(pairs_b[j].next, pairs[i].val) == 0) // find match { i ++; j ++; } else if(strcmp(pairs_b[j].next, pairs[i].val) > 0) // go on match to the next element { i ++; } else // find miss match { NumVal new_num_val; new_num_val.sequence = count; new_num_val.val = pairs_b[j].next; num_vals.push_back(new_num_val); j ++; } } // match and add for(t = 1; t <= count; t *= 2) { pairs_b = pairs; sort(pairs.begin(), pairs.end(), comp1); sort(pairs_b.begin(), pairs_b.end(), comp2); sort(num_vals.begin(), num_vals.end(), comp3); pairs_.clear(); num_vals_ = num_vals; for(i = 0, j = 0, k = 0; i < count - t;) { if(j < count - t && strcmp(pairs_b[i].next, pairs[j].val) == 0) // pairs_b match pairs { Pair new_pair; new_pair.val = pairs_b[i].val; new_pair.next = pairs[j].next; pairs_.push_back(new_pair); i ++; j ++; continue; } if(k < num_vals.size() && strcmp(pairs_b[i].next, num_vals[k].val) == 0) // pairs_b match num_vals { NumVal new_num_val; new_num_val.sequence = num_vals[k].sequence - t; new_num_val.val = pairs_b[i].val; num_vals_.push_back(new_num_val); i ++; k ++; continue; } if(j < count - t && strcmp(pairs_b[i].next, pairs[j].val) > 0) // go on search pairs { j ++; continue; } if(k < num_vals.size() && strcmp(pairs_b[i].next, num_vals[k].val) > 0) { k ++; continue; } } pairs = pairs_; num_vals = num_vals_; } sort(num_vals.begin(), num_vals.end(), comp4); cout << "results :" << endl; for(i = 0; i < num_vals.size(); i ++) { cout << num_vals[i].val << " "; }}
Sample input:
E F
F j
H I
I u
B o
W x
X Y
Y Z
Z C
C d
U V
V W
S
Q r
R S
O p
P T
T g
G h
A B
D e
J K
K L
L m
M n
Results:
Q r s a B o p t g h I u v w x y z C D E f j k l m n
The main idea of the algorithm is to use sorting to quickly identify the properties of the same elements in the two sets, maintain the three arrays f h g, F is the original ancestor of the remaining, H and F are the same, f is arranged in ascending order (lexicographically, irrelevant to the output sequence for easy re-query) of the second element of the tuples. H is arranged in ascending order of the first element of the tuples, G is the end element of the final sequence that already knows the sequence, which is listed in ascending order of the element lexicographically. The span t of the element group in H is continuously increased (that is, for <A, B>, T is the number of times that a has accessed B in the result sequence), and it is multiplied by 2, therefore, the External Loop is lg (N. The internal cycle is sorted, and the complexity is O (NLG (N), So the overall time complexity is O (NLg2 (N)).