constexpr const int pool_size = 3;
Const float32x4_t Top_data = VLD1Q_F32 (reinterpret_cast<const float *> (input_top_ptr + input.offset ()));
Const float32x4_t Middle_data = VLD1Q_F32 (reinterpret_cast<const float *> (input_middle_ptr + input.offset ()));
Const float32x4_t Bottom_data = VLD1Q_F32 (reinterpret_cast<const float *> (input_bottom_ptr + input.offset ()));
float32x2_t res = {}; if (Pooling_type = = Poolingtype::avg) {//Calculate scale float scale = Calc
Ulate_avg_scale (ID, pool_size, upper_bound_w, Upper_bound_h, pool_pad_x, Pool_pad_y, pool_stride_x, pool_stride_y);
Const float32x2_t SCALE_V = VDUP_N_F32 (scale);
Perform Pooling const float32x4_t Sum_data = VADDQ_F32 (Vaddq_f32 (Top_data, Bottom_data), middle_data); res = VPADD_F32 (Vget_high_f32 (Vsetq_lane_f32 (0.F, Sum_data, 3)), VGET_LOW_F32(Sum_data));
res = VMUL_F32 (VPADD_F32 (res, res), SCALE_V);//Get 4 largest float} else {
Const float32x4_t Max_data = VMAXQ_F32 (Vmaxq_f32 (Top_data, Bottom_data), middle_data); res = VPMAX_F32 (Vget_high_f32 (Vsetq_lane_f32 (-std::numeric_limits<float>::max (), Max_data, 3)
), Vget_low_f32 (Max_data));
res = VPMAX_F32 (res, RES); } * (Reinterpret_cast<float *> (Output.ptr ())) = Vget_lane_f32 (res, 0);
3x3 pooling first reads three columns separately
Because it's a 3x3 kernel, it only needs to compare the first three numbers
Vsetq_lane_f32 (-std::numeric_limits<float>::max (), Max_data, 3)
This function is to set the fourth number of max_data to the minimum value,
Vget_high_f32 and Vget_low_f32 are divided max_data into two parts, and then calculate the maximum value respectively, the result is the float32x2_t type
VPMAX_F32 compare to get the last maximum value
VGET_LANE_F32 (res, 0); Gets the value of res address 0