Analysis Process
First, we will analyze the training commands of YOLO (the source code of YOLO is written in C ++ ):
./Darknet detector train CFG/VOC. Data CFG/yolo-voc.cfg darknet19_448.conv.23
Here we can see that the parameter argv [] in the main function of Yolo corresponds to argv [0]-> Darknet argv [1]-> detector argv [2]-> train ..... (For the rest, we can see from here that the main function of Yolo must be in examples/Darknet. c. Let's take a look at the main function:
int main(int argc, char **argv){ //test_resize("data/bad.jpg"); //test_box(); //test_convolutional_layer(); if(argc < 2){ fprintf(stderr, "usage: %s <function>\n", argv[0]); return 0; } gpu_index = find_int_arg(argc, argv, "-i", 0); if(find_arg(argc, argv, "-nogpu")) { gpu_index = -1; }#ifndef GPU gpu_index = -1;#else if(gpu_index >= 0){ cuda_set_device(gpu_index); }#endif if (0 == strcmp(argv[1], "average")){ average(argc, argv); } else if (0 == strcmp(argv[1], "yolo")){ run_yolo(argc, argv); } else if (0 == strcmp(argv[1], "voxel")){ run_voxel(argc, argv); } else if (0 == strcmp(argv[1], "super")){ run_super(argc, argv); } else if (0 == strcmp(argv[1], "lsd")){ run_lsd(argc, argv); } else if (0 == strcmp(argv[1], "detector")){ run_detector(argc, argv); } else if (0 == strcmp(argv[1], "detect")){ float thresh = find_float_arg(argc, argv, "-thresh", .24); char *filename = (argc > 4) ? argv[4]: 0; char *outfile = find_char_arg(argc, argv, "-out", 0); int fullscreen = find_arg(argc, argv, "-fullscreen"); test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, .5, outfile, fullscreen); } else if (0 == strcmp(argv[1], "cifar")){ run_cifar(argc, argv); } else if (0 == strcmp(argv[1], "go")){ run_go(argc, argv); } else if (0 == strcmp(argv[1], "rnn")){ run_char_rnn(argc, argv); } else if (0 == strcmp(argv[1], "vid")){ run_vid_rnn(argc, argv); } else if (0 == strcmp(argv[1], "coco")){ run_coco(argc, argv); } else if (0 == strcmp(argv[1], "classify")){ predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5); } else if (0 == strcmp(argv[1], "classifier")){ run_classifier(argc, argv); } else if (0 == strcmp(argv[1], "regressor")){ run_regressor(argc, argv); } else if (0 == strcmp(argv[1], "segmenter")){ run_segmenter(argc, argv); } else if (0 == strcmp(argv[1], "art")){ run_art(argc, argv); } else if (0 == strcmp(argv[1], "tag")){ run_tag(argc, argv); } else if (0 == strcmp(argv[1], "compare")){ run_compare(argc, argv); } else if (0 == strcmp(argv[1], "dice")){ run_dice(argc, argv); } else if (0 == strcmp(argv[1], "writing")){ run_writing(argc, argv); } else if (0 == strcmp(argv[1], "3d")){ composite_3d(argv[2], argv[3], argv[4], (argc > 5) ? atof(argv[5]) : 0); } else if (0 == strcmp(argv[1], "test")){ test_resize(argv[2]); } else if (0 == strcmp(argv[1], "captcha")){ run_captcha(argc, argv); } else if (0 == strcmp(argv[1], "nightmare")){ run_nightmare(argc, argv); } else if (0 == strcmp(argv[1], "rgbgr")){ rgbgr_net(argv[2], argv[3], argv[4]); } else if (0 == strcmp(argv[1], "reset")){ reset_normalize_net(argv[2], argv[3], argv[4]); } else if (0 == strcmp(argv[1], "denormalize")){ denormalize_net(argv[2], argv[3], argv[4]); } else if (0 == strcmp(argv[1], "statistics")){ statistics_net(argv[2], argv[3]); } else if (0 == strcmp(argv[1], "normalize")){ normalize_net(argv[2], argv[3], argv[4]); } else if (0 == strcmp(argv[1], "rescale")){ rescale_net(argv[2], argv[3], argv[4]); } else if (0 == strcmp(argv[1], "ops")){ operations(argv[2]); } else if (0 == strcmp(argv[1], "speed")){ speed(argv[2], (argc > 3 && argv[3]) ? atoi(argv[3]) : 0); } else if (0 == strcmp(argv[1], "oneoff")){ oneoff(argv[2], argv[3], argv[4]); } else if (0 == strcmp(argv[1], "oneoff2")){ oneoff2(argv[2], argv[3], argv[4], atoi(argv[5])); } else if (0 == strcmp(argv[1], "partial")){ partial(argv[2], argv[3], argv[4], atoi(argv[5])); } else if (0 == strcmp(argv[1], "average")){ average(argc, argv); } else if (0 == strcmp(argv[1], "visualize")){ visualize(argv[2], (argc > 3) ? argv[3] : 0); } else if (0 == strcmp(argv[1], "mkimg")){ mkimg(argv[2], argv[3], atoi(argv[4]), atoi(argv[5]), atoi(argv[6]), argv[7]); } else if (0 == strcmp(argv[1], "imtest")){ test_resize(argv[2]); } else { fprintf(stderr, "Not an option: %s\n", argv[1]); } return 0;}
It is easy to see that the main function is a judgment of the argv [1] parameter. Different programs are started according to the content of argv [1. When we continue to follow the Training Command argv [1] = detector, the called function is run_detector, which is in examples/detector. at the end of C, let's take a look at this function:
void run_detector(int argc, char **argv){ char *prefix = find_char_arg(argc, argv, "-prefix", 0); float thresh = find_float_arg(argc, argv, "-thresh", .24); float hier_thresh = find_float_arg(argc, argv, "-hier", .5); int cam_index = find_int_arg(argc, argv, "-c", 0); int frame_skip = find_int_arg(argc, argv, "-s", 0); int avg = find_int_arg(argc, argv, "-avg", 3); if(argc < 4){ fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); return; } char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); char *outfile = find_char_arg(argc, argv, "-out", 0); int *gpus = 0; int gpu = 0; int ngpus = 0; if(gpu_list){ printf("%s\n", gpu_list); int len = strlen(gpu_list); ngpus = 1; int i; for(i = 0; i < len; ++i){ if (gpu_list[i] == ‘,‘) ++ngpus; } gpus = calloc(ngpus, sizeof(int)); for(i = 0; i < ngpus; ++i){ gpus[i] = atoi(gpu_list); gpu_list = strchr(gpu_list, ‘,‘)+1; } } else { gpu = gpu_index; gpus = &gpu; ngpus = 1; } int clear = find_arg(argc, argv, "-clear"); int fullscreen = find_arg(argc, argv, "-fullscreen"); int width = find_int_arg(argc, argv, "-w", 0); int height = find_int_arg(argc, argv, "-h", 0); int fps = find_int_arg(argc, argv, "-fps", 0); char *datacfg = argv[3]; char *cfg = argv[4]; char *weights = (argc > 5) ? argv[5] : 0; char *filename = (argc > 6) ? argv[6]: 0; if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile, fullscreen); else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear); else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile); else if(0==strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile); else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights); else if(0==strcmp(argv[2], "demo")) { list *options = read_data_cfg(datacfg); int classes = option_find_int(options, "classes", 2); char *name_list = option_find_str(options, "names", "data/names.list"); char **names = get_labels(name_list); demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, avg, hier_thresh, width, height, fps, fullscreen); }}
Here, the main role of run_detector is to execute different functions based on the value of argv []. We can ignore other GPU and threshold functions, the most important value here is the value of argv [2]. Different functions are executed based on different values. test_detector and train_detector are used in detector. C is defined, and we can see from the name what these functions are. Here we still follow the previous training command, argv [2] = train. Here let's take a look at the train_detector function (Note: I modified a part of it, not the original code ):
Void train_detector (char * datacfg, char * Upload File, char * weightfile, int * GPUs, int ngpus, int clear) {list * Options = read_data_cfg (datacfg ); char * train_images = option_find_str (options, "train", "scripts/train.txt"); // training set path char * backup_directory = option_find_str (options, "backup ", "/backup/"); // backup training result path srand (time (0); char * base = basecfg (partition file); printf ("% s \ n ", base); float avg_loss =-1; Network * nets = calloc (ngpus, sizeof (network); srand (time (0); int seed = rand (); int I; for (I = 0; I <ngpus; ++ I) {srand (SEED); # ifdef GPU cuda_set_device (GPUs [I]); # endif nets [I] = load_network (partition file, weightfile, clear); // load the network nets [I]. learning_rate * = ngpus;} srand (time (0); Network net = nets [0]; int IMGs = net. batch * Net. subdivisions * ngpus; printf ("learning rate: % G, momentum: % G, decay: % G \ n ", net. learning_rate, net. momentum, net. decay); Data train, buffer; layer L = net. layers [net. n-1]; int classes = L. classes; float jitter = L. jitter; list * plist = get_paths (train_images); // int n = plist-> size; char ** paths = (char **) list_to_array (plist ); load_args ARGs = {0}; args. W = net. w; args. H = net. h; args. paths = paths; args. N = IMGs; args. M = plist-> size; args. classes = classes; Args. jitter = jitter; args. num_boxes = L. max_boxes; args. D = & buffer; args. type = detection_data; args. threads = 8; args. angle = net. angle; args. exposure = net. exposure; args. saturation = net. saturation; args. hue = net. hue; pthread_t load_thread = load_data (ARGs); clock_t time; int COUNT = 0; // while (I * IMGs <n * 120) {While (get_current_batch (net) <net. max_batches) {If (L. random & COUNT ++ % 10 = 0 ){ Printf ("Resizing \ n"); int dim = (RAND () % 10 + 10) * 32; If (get_current_batch (net) + 200> net. max_batches) dim = 608; // int dim = (RAND () % 4 + 16) * 32; printf ("% d \ n", dim); args. W = dim; args. H = dim; pthread_join (load_thread, 0); train = buffer; free_data (Train); load_thread = load_data (ARGs); for (I = 0; I <ngpus; ++ I) {resize_network (nets + I, dim, dim);} net = nets [0];} time = clock (); pthr Ead_join (load_thread, 0); train = buffer; load_thread = load_data (ARGs);/* int K; For (k = 0; k <L. max_boxes; ++ K) {Box B = float_to_box (train. y. vals [10] + 1 + K * 5); If (! B. x) break; printf ("loaded: % F \ n", B. x, B. y, B. w, B. h);} * // * int ZZ; For (zz = 0; ZZ <train. x. cols; ++ zz) {image im = float_to_image (net. w, net. h, 3, train. x. vals [zz]); int K; For (k = 0; k <L. max_boxes; ++ K) {Box B = float_to_box (train. y. vals [zz] + K * 5); printf ("% F \ n", B. x, B. y, B. w, B. h); draw_bbox (IM, B, 1, 1, 0, 0);} show_image (IM, "truth11"); cvwaitkey (0); save_image (IM, "t Ruth11 ");} */printf (" loaded: % lf seconds \ n ", SEC (clock ()-Time); time = clock (); float loss = 0; # ifdef GPU if (ngpus = 1) {loss = train_network (net, train);} else {loss = train_networks (nets, ngpus, train, 4 );} # else loss = train_network (net, train); # endif if (avg_loss <0) avg_loss = loss; avg_loss = avg_loss *. 9 + loss *. 1; I = get_current_batch (net); printf ("% ld: % F, % F AVG, % F rate, % lf second S, % d images \ n ", get_current_batch (net), loss, avg_loss, get_current_rate (net), SEC (clock ()-Time), I * IMGs ); if (I % 1000 = 0) {# ifdef GPU if (ngpus! = 1) sync_nets (nets, ngpus, 0); # endif char buff [256]; sprintf (buff, "% S/% S. backup ", backup_directory, base); save_weights (net, buff);} if (I % 10000 = 0 | (I <1000 & I % 100 = 0 )) {# ifdef GPU if (ngpus! = 1) sync_nets (nets, ngpus, 0); # endif char buff [256]; sprintf (buff, "% S/% S _ % d. weights ", backup_directory, base, I); save_weights (net, buff);} free_data (Train);} # ifdef GPU if (ngpus! = 1) sync_nets (nets, ngpus, 0); # endif char buff [256]; sprintf (buff, "% S/% s_final.weights", backup_directory, base ); save_weights (net, buff );}
Here we mainly pay attention to the following functions: read_data_cfg of row 7th, train_images of row 8th, backup_directory of row 9th, and load_network function of row 25th:
The parameter datacfg in read_data_cfg can be seen in run_detector as Arg [3]. In this example, it corresponds to VOC. Data.
Train_images is used to specify the path of the image set to be trained.
Backup_directory is used to specify the path strength of the trained weights.
The load_network is used to load the network structure and parameters to be trained, Here run_detector can be seen that one of the load_network parameters of the upload file is argv [4], in our example is also the yolo-voc.cfg
Here, let's take a look at CFG/VOC. Data (Note: I modified it, not the original one)
classes= 2train = /home/iair339-04/darknet/scripts/train.txtvalid = /home/iair339-04/darknet/scripts/2007_test.txtnames = data/kitti.namesbackup = backup
Here we can see that VOC. Data is used to specify the category number classes, training set path train, test set path valid and category name names and backup file path (so easy ).
Next let's take a look at the yolo-voc.cfg file (Note: modified)
[Net] # testing # batch = 1 # subdivisions = 1 # training batch = 64 subdivisions = 8 Height = 416 width = 416 channels = 3 momentum = 0.9 decay = 0.0005 angle = 0 saturation = 1.5 Exposure = 1.5hue =. 1learning_rate = 0.001burn _ in = 1000max_batches = 80200 policy = stepssteps = 40000,60000 scales =. 1 ,. 1 [convolutional] batch_normalize = 1 filters = 32 size = 3 Stride = 1pad = 1 activation = leaky [maxpool] size = 2 Stride = 2 [convolutional] batch_normalize = 1 filters = 64 size = 3 Stride = 1pad = 1 activation = leaky [maxpool] size = 2 Stride = 2 [convolutional] batch_normalize = 1 filters = 128 size = 3 Stride = 1pad = 1 activation = leaky [convolutional] batch_normalize = 1 filters = 64 size = 1 stride = 1pad = 1 activation = leaky [convolutional] batch_normalize = 1 filters = 128 size = 3 Stride = 1pad = 1 activation = leaky [maxpool] size = 2 Stride = 2 [convolutional] batch_normalize = 1 filters = 256 size = 3 Stride = 1pad = 1 activation = leaky [convolutional] batch_normalize = 1 filters = 128 size = 1 stride = 1pad = 1 activation = leaky [convolutional] batch_normalize = 1 filters = 256 size = 3 Stride = 1pad = 1 activation = leaky [maxpool] size = 2 Stride = 2 [convolutional] batch_normalize = 1 filters = 512 size = 3 Stride = 1pad = 1 activation = leaky [convolutional] batch_normalize = 1 filters = 256 size = 1 stride = 1pad = 1 activation = leaky [convolutional batch_normalize = 1 filters = 512 size = 3 Stride = 1pad = 1 activation = leaky [convolutional] batch_normalize = 1 filters = 256 size = 1 stride = 1pad = 1 activation = leaky [convolutional batch_normalize = 1 filters = 512 size = 3 Stride = 1pad = 1 activation = leaky [maxpool] size = 2 Stride = 2 [convolutional] batch_normalize = 1 filters = 1024 size = 3 Stride = 1pad = 1 activation = leaky [convolutional] batch_normalize = 1 filters = 512 size = 1 stride = 1pad = 1 activation = leaky [convolutional] batch_normalize = 1 filters = 1024 size = 3 Stride 1pad = 1 activation = leaky [convolutional] batch_normalize = 1 filters = 512 size = 1 stride = 1pad = 1 activation = leaky [convolutional] batch_normalize = 1 filters = 1024 size = 3 Stride 1pad = 1 activation = leaky ###### [convolutional] batch_normalize = 1 size = 3 Stride = 1pad = 1 filters = 1024 activation = leaky [convolutional] batch_normalize = 1 Size = 3 Stride = 1pad = 1 filters = 1024 activation = leaky [Route] layers =-9 [convolutional] batch_normalize = 1 size = 1 stride = 1pad = 1 filters = 64 activation = leaky [reorg] stride = 2 [Route] layers =-1, -4 [convolutional] batch_normalize = 1 size = 3 Stride = 1pad = 1 filters = 1024 activation = leaky [convolutional] size = 1 stride = 1pad = 1 filters = 35 # modify activation here = Linear [region] Anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071bias _ match = 1 classes = 2 # modify the category coords = 4num = 5 softmax = 1 jitter =. 3 rescore = 1object_scale = 5noobject_scale = 1class_scale = 1coord_scale = 1 absolute = 1 thresh =. 6 random = 1
Here, the [net] contains the super parameter settings of the network, followed by the network structure of Yolo V2.
Yolov2 source code analysis