Preface
On June 30, 2017, the source of YOLO on GitHub was updated, and the location of many files changed. This led me to a lot of trouble when I used YOLO v2 to train my own data (many of the solutions on the Web were not perfect). In this experience, I fully recognized the importance of understanding the source code for the successful training of their own data, so I decided to put the YOLO V2 for everyone to comb a simple, easy to YOLO official online training methods understanding. (because it is code grooming, so the source is not explained)
The analysis process begins with the YOLO Training Command (the source code for YOLO is written in C + +):
From here we can see that YOLO main function in main parameter argv[] in which the corresponding values are argv[0], darknet argv[1], detector argv[2], and train ..... (See the rest of it), from here we can see that YOLO main function main must be in the examples/darknet.c, let us take a look at the main function:
int main (int argc, char **argv) {//test_resize ("data/bad.jpg");
Test_box ();
Test_convolutional_layer ();
if (ARGC < 2) {fprintf (stderr, "Usage:%s <function>\n", argv[0]);
return 0;
} Gpu_index = Find_int_arg (argc, argv, "-i", 0);
if (Find_arg (argc, argv, "-nogpu")) {gpu_index =-1;
} #ifndef GPU Gpu_index =-1;
#else if (gpu_index >= 0) {cuda_set_device (gpu_index);
} #endif if (0 = = strcmp (argv[1], "average")) {average (argc, argv);
} else if (0 = = strcmp (argv[1], "YOLO")) {Run_yolo (argc, argv);
} else if (0 = = strcmp (argv[1], "voxel")) {Run_voxel (argc, argv);
} else if (0 = = strcmp (argv[1], "super")) {Run_super (argc, argv);
} else if (0 = = strcmp (argv[1], "LSD")) {RUN_LSD (argc, argv);
} else if (0 = = strcmp (argv[1], "detector")) {Run_detector (argc, argv); } else if (0 = = strcmp (argv[1], "detect")) {float Thresh= Find_float_arg (argc, argv, "-thresh",. 24); Char *filename = (argc > 4)?
ARGV[4]: 0;
Char *outfile = Find_char_arg (argc, argv, "-out", 0);
int fullscreen = Find_arg (argc, argv, "-fullscreen");
Test_detector ("Cfg/coco.data", argv[2], argv[3], filename, Thresh,. 5, outfile, fullscreen);
} else if (0 = = strcmp (argv[1], "Cifar")) {Run_cifar (argc, argv);
} else if (0 = = strcmp (argv[1], "Go")) {Run_go (argc, argv);
} else if (0 = = strcmp (argv[1], "RNN")) {Run_char_rnn (argc, argv);
} else if (0 = = strcmp (argv[1], "vid")) {Run_vid_rnn (argc, argv);
} else if (0 = = strcmp (argv[1], "Coco")) {Run_coco (argc, argv); } else if (0 = = strcmp (argv[1], "classify")) {Predict_classifier ("Cfg/imagenet1k.data", argv[2], argv[3], argv[4],
5);
} else if (0 = = strcmp (argv[1], "classifier")) {Run_classifier (argc, argv); } else if (0 = = strcmp (argv[1], "regressor")) {Run_regressor (aRGC, argv);
} else if (0 = = strcmp (argv[1], "Segmenter")) {Run_segmenter (argc, argv);
} else if (0 = = strcmp (argv[1], "art")) {Run_art (argc, argv);
} else if (0 = = strcmp (argv[1], "tag")) {Run_tag (argc, argv);
} else if (0 = = strcmp (argv[1], "compare")) {Run_compare (argc, argv);
} else if (0 = = strcmp (argv[1], "dice")) {Run_dice (argc, argv);
} else if (0 = = strcmp (argv[1], "writing")) {run_writing (argc, argv); } else if (0 = = strcmp (argv[1], "3d")) {composite_3d (argv[2], argv[3], argv[4], (argc > 5)? atof (Argv[5]): 0)
;
} else if (0 = = strcmp (argv[1], "test")) {test_resize (argv[2]);
} else if (0 = = strcmp (argv[1], "Captcha")) {Run_captcha (argc, argv);
} else if (0 = = strcmp (argv[1], "nightmare")) {Run_nightmare (argc, argv);
} else if (0 = = strcmp (argv[1], "RGBGR")) {rgbgr_net (argv[2], argv[3], argv[4]);
} else if (0 = = strcmp (argv[1], "reset")) { Reset_normalize_net (Argv[2], argv[3], argv[4]);
} else if (0 = = strcmp (argv[1], "denormalize")) {denormalize_net (argv[2], argv[3], argv[4]);
} else if (0 = = strcmp (argv[1], "statistics")) {statistics_net (argv[2], argv[3]);
} else if (0 = = strcmp (argv[1], "normalize")) {normalize_net (argv[2], argv[3], argv[4]);
} else if (0 = = strcmp (argv[1], "Rescale")) {rescale_net (argv[2], argv[3], argv[4]);
} else if (0 = = strcmp (argv[1], "Ops")) {operations (argv[2]);
} else if (0 = = strcmp (argv[1], "speed")) {speed (argv[2], (argc > 3 && argv[3])? Atoi (Argv[3]): 0);
} else if (0 = = strcmp (argv[1], "OneOff")) {OneOff (argv[2], argv[3], argv[4]);
} else if (0 = = strcmp (argv[1], "ONEOFF2")) {oneoff2 (argv[2], argv[3], argv[4], atoi (argv[5]));
} else if (0 = = strcmp (argv[1], "partial")) {partial (argv[2], argv[3], argv[4], atoi (argv[5])); } else if (0 = = strcmp (argv[1], "AveragE ")) {average (argc, argv);
} else if (0 = = strcmp (argv[1], "visualize")) {visualize (argv[2], (argc > 3)? Argv[3]: 0); } else if (0 = = strcmp (argv[1], "Mkimg")) {Mkimg (argv[2], argv[3], atoi (argv[4]), Atoi (Argv[5]), Atoi (argv[6]), AR
GV[7]);
} else if (0 = = strcmp (argv[1], "imtest")) {test_resize (argv[2]);
} else {fprintf (stderr, "not an option:%s\n", argv[1]);
} return 0; }
It is very simple to see that the main function is a judge of the parameter argv[1], according to the content of argv[1] to start a different program. Let's go on with the Training command. argv[1] = detector, the function called is run_detector, and this function at the end of Examples/detector.c, let's take a look at this function:
void Run_detector (int argc, char **argv) {char *prefix = Find_char_arg (argc, argv, "-prefix", 0);
float Thresh = Find_float_arg (argc, argv, "-thresh",. 24);
float Hier_thresh = Find_float_arg (argc, argv, "-hier",. 5);
int cam_index = Find_int_arg (argc, argv, "-C", 0);
int frame_skip = Find_int_arg (argc, argv, "-S", 0);
int avg = Find_int_arg (argc, argv, "-avg", 3); if (ARGC < 4) {fprintf (stderr, "Usage:%s%s [train/test/valid] [CFG] [Weights (optional)]\n", Argv[0], argv[1])
;
Return
} Char *gpu_list = Find_char_arg (argc, argv, "-gpus", 0);
Char *outfile = Find_char_arg (argc, argv, "-out", 0);
int *gpus = 0;
int GPU = 0;
int Ngpus = 0;
if (gpu_list) {printf ("%s\n", gpu_list);
int len = strlen (gpu_list);
Ngpus = 1;
int i;
for (i = 0; i < len; ++i) {if (gpu_list[i] = = ', ') ++ngpus;
} GPUs = Calloc (Ngpus, sizeof (int)); for (i = 0; i < nGPUs
++i) {Gpus[i] = atoi (gpu_list);
Gpu_list = STRCHR (gpu_list, ', ') +1;
}} else {GPU = Gpu_index;
GPUs = &gpu;
Ngpus = 1;
} int clear = Find_arg (argc, argv, "-clear");
int fullscreen = Find_arg (argc, argv, "-fullscreen");
int width = Find_int_arg (argc, argv, "-W", 0);
int height = Find_int_arg (argc, argv, "-H", 0);
int fps = Find_int_arg (argc, argv, "-fps", 0);
Char *datacfg = argv[3];
Char *cfg = argv[4]; Char *weights = (argc > 5)?
ARGV[5]: 0; Char *filename = (argc > 6)?
ARGV[6]: 0; if (0==strcmp (argv[2], "test")) Test_detector (datacfg, CFG, weights, filename, thresh, Hier_thresh, outfile, fullscreen)
;
else if (0==strcmp (argv[2], "Train")) Train_detector (datacfg, CFG, weights, GPUs, Ngpus, clear);
else if (0==strcmp (argv[2], "valid")) Validate_detector (datacfg, CFG, Weights, outfile); else if (0==strcmp (argv[2], "Valid2")) Validate_detector_flip (Datacfg, CFG, weights, outfile);
else if (0==strcmp (argv[2], "recall")) Validate_detector_recall (cfg, weights);
else if (0==strcmp (Argv[2], "demo")) {list *options = Read_data_cfg (datacfg);
int classes = Option_find_int (options, "classes", 2);
Char *name_list = option_find_str (options, "names", "data/names.list");
Char **names = get_labels (name_list); Demo (cfg, Weights, thresh, cam_index, filename, names, classes, Frame_skip, prefix, AVG, Hier_thresh, width, height, fps,
fullscreen); }
}
Here the main role of Run_detector is to perform different functions according to the value of argv[], other about the GPU Ah, threshold ah such as we can do without the tube, the most important here is the value of argv[2], according to its value, different functions, Test_detector,train_detector These functions are defined in the DETECTOR.C, and we can see from the name what these functions do. Here we still follow the previous training command, argv[2] = train, here Let's take a look at the Train_detector function (Note: Here I modified a part of, not the original code):
void Train_detector (char *datacfg, Char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) {List *options =
Read_data_cfg (DATACFG); Char *train_images = option_find_str (options, "Train", "Scripts/train.txt"); Training set Path char *backup_directory = OPTION_FIND_STR (options, "Backup", "/backup/");
Backup Training result Path Srand (time (0));
Char *base = basecfg (cfgfile);
printf ("%s\n", base);
float Avg_loss =-1;
Network *nets = calloc (Ngpus, sizeof (network));
Srand (Time (0));
int seed = rand ();
int i; for (i = 0; i < Ngpus; ++i) {srand (seed); #ifdef GPU Cuda_set_device (Gpus[i]); #endif nets[i] = Load_network (Cfgfile, Weightfile, clear);
Load network nets[i].learning_rate *= Ngpus;
} srand (Time (0));
Network net = Nets[0];
int IMGs = Net.batch * net.subdivisions * NGPUS;
printf ("Learning rate:%g, Momentum:%g, Decay:%g\n", Net.learning_rate, Net.momentum, Net.decay);
Data train, buffer; Layer L = net.layers[net.n-1];
int classes = l.classes;
float jitter = l.jitter;
List *plist = Get_paths (train_images);
int N = plist->size;
Char **paths = (char * *) List_to_array (plist);
Load_args args = {0};
ARGS.W = NET.W;
Args.h = net.h;
Args.paths = paths;
ARGS.N = IMGs;
ARGS.M = plist->size;
args.classes = classes;
Args.jitter = jitter;
Args.num_boxes = l.max_boxes;
ARGS.D = &buffer;
Args.type = Detection_data;
Args.threads = 8;
Args.angle = Net.angle;
Args.exposure = net.exposure;
Args.saturation = net.saturation;
Args.hue = Net.hue;
pthread_t load_thread = Load_data (args);
clock_t time;
int count = 0; while (I*imgs < n*120) {while (Get_current_batch (net) < Net.max_batches) {if (L.random && count++
%10 = = 0) {printf ("resizing\n");
int Dim = (rand ()% 10 + 10) * 32; if (Get_current_batch (net) +200 > Net.max_Batches) Dim = 608;
int Dim = (rand ()% 4 + 16) * 32;
printf ("%d\n", dim);
ARGS.W = Dim;
Args.h = Dim;
Pthread_join (load_thread, 0);
train = buffer;
Free_data (train);
Load_thread = Load_data (args);
for (i = 0; i < Ngpus; ++i) {resize_network (nets + I, dim, dim);
} net = Nets[0];
} time=clock ();
Pthread_join (load_thread, 0);
train = buffer;
Load_thread = Load_data (args);
/* int k;
for (k = 0; k < l.max_boxes; ++k) {Box b = Float_to_box (train.y.vals[10] + 1 + k*5);
if (!b.x) break;
printf ("Loaded:%f%f%f%f\n", b.x, B.y, B.W, b.h);
} */* int zz; for (zz = 0; zz < train. X.cols; ++ZZ) {Image im = Float_to_image (NET.W, Net.h, 3, train.
X.VALS[ZZ]);
int k; Fork = 0; K < l.max_boxes;
++k) {Box b = Float_to_box (Train.y.vals[zz] + k*5);
printf ("%f%f%f%f\n", b.x, B.y, B.W, b.h);
Draw_bbox (IM, B, 1, 1,0,0);
} show_image (IM, "truth11");
Cvwaitkey (0);
Save_image (IM, "truth11");
} */printf ("Loaded:%lf seconds\n", SEC (Clock ()-time));
Time=clock ();
float loss = 0;
#ifdef GPU if (Ngpus = = 1) {loss = Train_network (NET, train);
} else {loss = Train_networks (nets, Ngpus, train, 4);
} #else loss = Train_network (NET, train);
#endif if (Avg_loss < 0) Avg_loss = loss;
Avg_loss = avg_loss*.9 + loss*.1;
i = get_current_batch (net); printf ("%ld:%f,%f avg,%f rate,%lf seconds,%d images\n", Get_current_batch (net), loss, Avg_loss, get_current_rate (NET)
, SEC (Clock ()-time), I*imgs); if (i%1000==0) {#ifdef GPU IF (Ngpus! = 1) sync_nets (nets, Ngpus, 0);
#endif Char buff[256];
sprintf (Buff, "%s/%s.backup", backup_directory, Base);
Save_weights (NET, buff); } if (I%10000==0 | | (i < && i%100 = = 0))
{#ifdef GPU if (ngpus! = 1) sync_nets (nets, Ngpus, 0); #endif Char buff[256];
sprintf (Buff, "%s/%s_%d.weights", backup_directory, Base, i);
Save_weights (NET, buff);
} free_data (train);
} #ifdef GPU if (ngpus! = 1) sync_nets (nets, Ngpus, 0);
#endif Char buff[256];
sprintf (Buff, "%s/%s_final.weights", backup_directory, Base);
Save_weights (NET, buff); }
Here we focus on the function is the 7th row of Read_data_cfg, 8th row of Train_images, 9th row of backup_directory and 25th row load_network function: Read_data_ The parameter datacfg in CFG can be seen in Run_detector as Arg[3], in this case Voc.data train_images is used to specify the path of the set of pictures to be trained. The backup_directory is used to specify the weight of the training out of the road. And load_network is used to load the network structure and parameters to be trained, here run_detector can be seen in load_network one of the parameters Cfgfile is argv[4], in our case is yolo-voc.cfg
Here we take a look at Cfg/voc.data (Note: This is what I have modified, not the original)
Classes= 2
Train =/home/iair339-04/darknet/scripts/train.txt
valid =/home/iair339-04/darknet/ Scripts/2007_test.txt
names = data/kitti.names
backup = Backup
Here you can see that voc.data is used to specify the number of classes classes, the training set path train, the test set path valid and the category name names and the backup file path for backup (so easy).
Next, let's take a look at the Yolo-voc.cfg file (Note: modified)
NET] # testing #batch =1 #subdivisions =1 # Training batch=64 subdivisions=8 height=416 width=416 channels=3 momentum=0.9 decay=0.0005 angle=0 saturation = 1.5 exposure = 1.5 hue=.1 learning_rate=0.001 burn_in=1000 max_batches = 80200 policy=s
Teps steps=40000,60000 scales=.1,.1 [convolutional] batch_normalize=1 filters=32 size=3 stride=1 pad=1 activation=leaky
[Maxpool] size=2 stride=2 [convolutional] batch_normalize=1 filters=64 size=3 stride=1 pad=1 activation=leaky [Maxpool] size=2 stride=2 [convolutional] batch_normalize=1 filters=128 size=3 stride=1 pad=1 activation=leaky [convolutional] Ba tch_normalize=1 filters=64 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=128 size=3 s tride=1 pad=1 Activation=leaky [Maxpool] size=2 stride=2 [convolutional] batch_normalize=1 filters=256 size=3 stride=1 p Ad=1 Activation=leaky [convolutional] batch_normalize=1 filters=128 size=1 stride=1 pad=1 activation=leaky [convolution AL] Batch_normalize=1 Filters=256 size=3 stride=1 pad=1 activation=leaky [Maxpool] size=2 stride=2 [convolutional] batch_normalize=1 filters=512 si ze=3 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=256 size=1 stride=1 pad=1 activation=leak y [convolutional] batch_normalize=1 filters=512 size=3 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=
1 filters=256 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=512 size=3 stride=1 pad=1 Activation=leaky [Maxpool] size=2 stride=2 [convolutional] batch_normalize=1 filters=1024 size=3 stride=1 pad=1 Activati On=leaky [convolutional] batch_normalize=1 filters=512 size=1 stride=1 pad=1 activation=leaky [convolutional] Batch_nor malize=1 filters=1024 size=3 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=512 size=1 Stride
=1 pad=1 Activation=leaky [convolutional] batch_normalize=1 filters=1024 size=3 stride=1 pad=1 activation=leaky ####### [Convolutional] Batch_normalize=1 size=3 stride=1 pad=1 filters=1024 activation=leaky [convolutional] batch_normalize=1 size=3 stride=1 pad=1 filte rs=1024 Activation=leaky [Route] layers=-9 [convolutional] batch_normalize=1 size=1 stride=1 pad=1 filters=64 activation =leaky [reorg] stride=2 [Route] layers=-1,-4 [convolutional] batch_normalize=1 size=3 stride=1 pad=1 filters=1024 Activ Ation=leaky [convolutional] size=1 stride=1 pad=1 filters=35 #此处修改 activation=linear [region] anchors = 1.3221, 1.7314 5, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071 bias_match=1 classes=2 #此处修改种类 coords=4 num=5 so Ftmax=1 jitter=.3 rescore=1 object_scale=5 noobject_scale=1 class_scale=1 coord_scale=1 absolute=1 thresh =. 6 Random=1
Here [NET] is the network's hyper-parameter settings, and then the YOLO V2 network structure.
ConclusionHere just through the Training command to simple analysis of how YOLO v2 work, mainly to let everyone training their data can be more convenient, if you want to learn more about YOLO source code, but also need to read the source carefully.