C language implementation of kd tree and knn algorithm, knn algorithm of kd tree

Source: Internet
Author: User

C language implementation of kd tree and knn algorithm, knn algorithm of kd tree

For the implementation principle of knn Based on kd tree, refer to the links at the end of this article.

Refer to other people's code here. A program written in C language that includes building the kd tree and searching for k-nearest neighbors.

 

Code:

1 # include <stdio. h> 2 # include <stdlib. h> 3 # include <math. h> 4 # include <time. h> 5 6 typedef struct {// data dimension 7 double x; 8 double y; 9} data_struct; 10 11 typedef struct kd_node {12 data_struct split_data; // data node 13 int split; // split dimension 14 struct kd_node * left; // The kd-tree 15 struct kd_node * right composed of all data points in the left sub-space of this node; // kd-tree 16} kd_struct composed of all data points in the right sub-space of the over-surface separated node; 17 18 // used to sort 19 int cmp1 (const void * a, co Nst void * B) 20 {21 return (* (data_struct *) a). x> (* (data_struct *) B). x? 1:-1; 22} 23 // used to sort 24 int cmp2 (const void * a, const void * B) 25 {26 return (* (data_struct *) ). y> (* (data_struct *) B ). y? 1:-1; 27} 28 // computing splitting and splitting node 29 void choose_split (data_struct data_set [], int size, int dimension, int * split, data_struct * split_data) 30 {31 int I; 32 data_struct * data_temp; 33 data_temp = (data_struct *) malloc (size * sizeof (data_struct); 34 for (I = 0; I <size; I ++) 35 data_temp [I] = data_set [I]; 36 static int count = 0; // set it to static 37 * split = (count ++) % dimension; // split dimension 38 if (* split) = 0) qsort (data_temp, size, sizeof (da Ta_temp [0]), cmp1); 39 else qsort (data_temp, size, sizeof (data_temp [0]), cmp2); 40 * split_data = data_temp [(size-1) /2]; // The split node is 41} 42 in the middle. // determine whether two data points are equal. 43 int equal (data_struct a, data_struct B) {44 if (. x = B. x &. y = B. y) return 1; 45 else return 0; 46} 47 // build KD tree 48 kd_struct * build_kdtree (data_struct data_set [], int size, int dimension, kd_struct * T) 49 {50 if (size = 0) return NULL; // recursive exit 51 else {52 I Nt sizeleft = 0, sizeright = 0; 53 int I, split; 54 data_struct split_data; 55 choose_split (data_set, size, dimension, & split, & split_data ); 56 data_struct data_right [size]; 57 data_struct data_left [size]; 58 59 if (split = 0) {// x dimension 60 for (I = 0; I <size; ++ I) {61 if (! Equal (data_set [I], split_data) & data_set [I]. x <= split_data.x) {// 62 smaller than the split node data_left [sizeleft]. x = data_set [I]. x; 63 data_left [sizeleft]. y = data_set [I]. y; 64 sizeleft ++; // Number of knots in the left sub-space of the split node 65} 66 else if (! Equal (data_set [I], split_data) & data_set [I]. x> split_data.x) {// 67 data_right [sizeright] larger than the split node. x = data_set [I]. x; 68 data_right [sizeright]. y = data_set [I]. y; 69 sizeright ++; // Number of knots 70} 71} 72} 73 else {// y dimension 74 for (I = 0; I <size; ++ I) {75 if (! Equal (data_set [I], split_data) & data_set [I]. y <= split_data.y) {76 data_left [sizeleft]. x = data_set [I]. x; 77 data_left [sizeleft]. y = data_set [I]. y; 78 sizeleft ++; 79} 80 else if (! Equal (data_set [I], split_data) & data_set [I]. y> split_data.y) {81 data_right [sizeright]. x = data_set [I]. x; 82 data_right [sizeright]. y = data_set [I]. y; 83 sizeright ++; 84} 85} 86} 87 T = (kd_struct *) malloc (sizeof (kd_struct); 88 T-> split_data.x = split_data.x; 89 T-> split_data.y = split_data.y; 90 T-> split = split; 91 T-> left = build_kdtree (data_left, sizeleft, dimension, T-> left ); // left sub-space 92 T-> right = build_k Dtree (data_right, sizeright, dimension, T-> right); // return T of the right sub-space 93; // return pointer 94} 95} 96 // calculate the Euclidean distance 97 double compute_distance (data_struct a, data_struct B) {98 double tmp = pow (. x-b.x, 2.0) + pow (. y-b.y, 2.0); 99 return sqrt (tmp); 100} 101 // search 1 nearest neighbor 102 void search_nearest (kd_struct * T, int size, data_struct test, data_struct * nearest_point, double * distance) 103 {104 int path_size; // Number of pointers in the search path 105 kd_struct * search _ Path [size]; // The search path stores the pointer 106 kd_struct * psearch = T; 107 data_struct nearest; // The nearest neighbor node 108 double dist; // The distance between the query node and the nearest neighbor node is 109 search_path [0] = psearch; // The initial search path is 110 path_size = 1; 111 while (psearch-> left! = NULL | psearch-> right! = NULL) {112 if (psearch-> split = 0) {113 if (test. x <= psearch-> split_data.x) // enter the left subtree if it is smaller than 114 psearch = psearch-> left; 115 else116 psearch = psearch-> right; 117} 118 else {119 if (test. y <= psearch-> split_data.y) // enter the right subtree if the value is smaller than 120 psearch = psearch-> left; 121 else122 psearch = psearch-> right; 123} 124 search_path [path_size ++] = psearch; // Save the split node in the search path 125} 126 // retrieve the last element of search_path, that is, the leaf node is assigned to nearest127 nearest. x = search_pat H [path_size-1]-> split_data.x; 128 nearest. y = search_path [path_size-1]-> split_data.y; 129 path_size --; // The number of pointers to search_path minus one 130 dist = compute_distance (nearest, test ); // calculate the distance from the leaf node as the initial distance of 131 132 // trace the search path 133 kd_struct * pback; 134 while (path_size! = 0) {135 pback = search_path [path_size-1]; // retrieve the last node of search_path and assign it to pback136 path_size --; // The number of pointers in search_path minus 137 138 if (pback-> left = NULL & pback-> right = NULL) {// if pback is the leaf node 139 if (dist> compute_distance (pback-> split_data, test) {140 nearest = pback-> split_data; 141 dist = compute_distance (pback-> split_data, test); 142} 143} 144 else {// If pback is the split node 145 int s = pback-> split; 146 if (s = 0) {// x dimension 147 if (fabs (pback-> split_d Ata. x-test.x) <dist) {// If the Circle centered on the query point (ball or ball), the circle with the radius of dist and the split superplane intersection, then we need to jump to the sub-space on the other side to search for 148 if (dist> compute_distance (pback-> split_data, test) {149 nearest = pback-> split_data; 150 dist = compute_distance (pback-> split_data, test); 151} 152 if (test. x <= pback-> split_data.x) // If the query point is located in the left sub-space of pback, jump to the right sub-space to search for 153 psearch = pback-> right; 154 else155 psearch = pback-> left; // if the query point is located in the right sub-space of pback, You need to jump to the left sub-space to search for 156 if (psearch! = NULL) 157 search_path [path_size ++] = psearch; // Add psearch to search_path 158} 159} 160 else {// y dimension 161 if (fabs (pback-> split_data.y-test.y) <dist) {// If the Circle centered on the query point (ball or Superball), the circle with the radius of dist and the split superplane intersect, then we need to jump to the sub-space on the other side to search for 162 if (dist> compute_distance (pback-> split_data, test) {163 nearest = pback-> split_data; 164 dist = compute_distance (pback-> split_data, test); 165} 166 if (test. y <= pback-> split_data.y) // If the query point is located in the left sub-space of pback, the query point is jumped to the right sub-space to search for 167 p. Search = pback-> right; 168 else169 psearch = pback-> left; // if the query point is located in the right sub-space of pback, You need to jump to the left sub-space to search for 170 if (psearch! = NULL) 171 search_path [path_size ++] = psearch; // Add psearch to search_path 172} 173} 174} 175 176 (* nearest_point ). x = nearest. x; // The nearest neighbor 178 (* nearest_point ). y = nearest. y; 179 * distance = dist; // distance 180} 181 182 int main () 183 {184 int n = 6; // Data Count 185 data_struct nearest_point; 186 double distance; 187 kd_struct * root = NULL; 188 data_struct data_set [6] = {2, 3}, {5, 4}, {9, 6}, {4, 7}, {8, 1}, {7, 2 }}; // dataset 189 data_struct test = {7.1, 2.1}; // query point 190 root = build_kdtree (data_set, n, 2, root); 191 192 search_nearest (root, n, test, & nearest_point, & distance); 193 printf ("nearest neighbor :( %. 2f, %. 2f) \ ndistance: %. 2f \ n ", nearest_point.x, nearest_point.y, distance); 194 return 0; 195} 196/* x 5, 198/\ 7.2199 y 200 8.1 \/\ 9.6201 x */

 

Refer:

Https://www.joinquant.com/post/2627? F = study & m = math

Https://www.joinquant.com/post/2843? F = study & m = math

Http://blog.csdn.net/zhl30041839/article/details/9277807

 

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.