1, YOLO Training, the main function of loading image data is: Load_data_region (), which contains a lot of image preprocessing, such as a variety of exposure adjustment, image crop and other operations. The specific implementation is as follows:
DATA.C data load_data_detection (int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hu
E, float saturation, float exposure) {char **random_paths = get_random_paths (paths, N, m);
int i;
Data D = {0};
D.shallow = 0;
Each row here is stored with a picture data, where n is batch size d.x.rows = N;
D.x.vals = calloc (d.x.rows, sizeof (float*));
D.x.cols = h*w*3;
Here y is the label d.y = Make_matrix (n, 5*boxes);
for (i = 0; i < n; ++i) {//load image Image orig = Load_image_color (Random_paths[i], 0, 0);
int oh = orig.h;
int ow = ORIG.W;
/* Here is to add jitter interference to the data, improve the network generalization ability (in fact, is crop, data augmentation of one).
The jitter=0.2 of the configuration file, the height of the width is clipped or the original width of the height is increased by 1/5.
*/int DW = (ow*jitter);
int dh = (oh*jitter);
Here to produce a random value int pleft = Rand_uniform (-DW, DW);
int pright = Rand_uniform (-DW, DW);
int ptop = Rand_uniform (-DH, DH);
int pbot = Rand_uniform (-DH, DH);
int swidth = Ow-pleft-pright; int sheight = Oh-ptop-pbot;
The scale calculated here is to calculate the position area of the actual sample after the jitter.
float SX = (float) swidth/ow;
Float sy = (float) Sheight/oh;
int flip = Random_gen ()%2;
Crop the image cropped = Crop_image (orig, Pleft, Ptop, Swidth, sheight);
float dx = ((float) pleft/ow)/sx;
float dy = ((float) ptop/oh)/sy;
The cropped image is normalized to 416*416 image sized = Resize_image (cropped, W, h);
if (flip) flip_image (sized);
Adjust random_distort_image (sized, hue, saturation, exposure) for the image into hue, exposure, etc.
D.x.vals[i] = Sized.data;//The label of the corresponding data is read and the image coordinates are restored. Its specific implementation see its Code implementation fill_truth_detection (Random_paths[i], boxes, d.y.vals[i], classes, flip, dx, dy, 1./sx, 1./sy);Free_image (orig);
Free_image (sized);
Free_image (cropped);
} free (random_paths);
return D; }
Training sample Coordinate Recovery code:
DATA.C void Fill_truth_region (char *path, float *truth, int classes, int num_boxes, int flip, float dx, float dy, float
SX, float sy) {char labelpath[4096];
Find_replace (Path, "Images", "labels", Labelpath);
Find_replace (Labelpath, "jpegimages", "labels", Labelpath);
Find_replace (Labelpath, ". jpg", ". txt", labelpath);
Find_replace (Labelpath, ". png", ". txt", labelpath); Find_replace (Labelpath, ".
JPG ",". txt ", labelpath); Find_replace (Labelpath, ".
JPEG ",". txt ", labelpath);
int count = 0;
Box_label *boxes = read_boxes (Labelpath, &count);
Randomize_boxes (boxes, count);
Correct_boxes (boxes, count, dx, DY, sx, SY, flip);
float x,y,w,h;
int id;
int i;
for (i = 0; i < count; ++i) {x = boxes[i].x;
y = boxes[i].y;
W = BOXES[I].W;
h = boxes[i].h;
id = boxes[i].id;
if (W <. Continue | | h <.);
int col = (int) (x*num_boxes); int row = (int) (y*num_boxes);
x = X*num_boxes-col;
y = Y*num_boxes-row;
int index = (col+row*num_boxes) * (5+classes);
if (Truth[index]) continue;
truth[index++] = 1;
if (ID < classes) truth[index+id] = 1;
Index + = classes;
truth[index++] = x;
truth[index++] = y;
truth[index++] = W;
truth[index++] = h;
} free (boxes);
}