[453] | 1 | #include <stdlib.h>
|
---|
| 2 | #include <math.h>
|
---|
| 3 | #include <stdio.h>
|
---|
| 4 | #include "region_layer.h"
|
---|
| 5 |
|
---|
| 6 | typedef struct
|
---|
| 7 | {
|
---|
[458] | 8 | float x;
|
---|
| 9 | float y;
|
---|
| 10 | float w;
|
---|
| 11 | float h;
|
---|
[453] | 12 | } box_t;
|
---|
| 13 |
|
---|
| 14 | typedef struct
|
---|
| 15 | {
|
---|
[458] | 16 | int index;
|
---|
| 17 | int class;
|
---|
| 18 | float **probs;
|
---|
[453] | 19 | } sortable_box_t;
|
---|
| 20 |
|
---|
| 21 |
|
---|
| 22 | int region_layer_init(region_layer_t *rl, int width, int height, int channels, int origin_width, int origin_height)
|
---|
| 23 | {
|
---|
[458] | 24 | int flag = 0;
|
---|
[453] | 25 |
|
---|
[458] | 26 | rl->coords = 4;
|
---|
| 27 | rl->image_width = 320;
|
---|
| 28 | rl->image_height = 240;
|
---|
[453] | 29 |
|
---|
[458] | 30 | rl->classes = channels / 5 - 5;
|
---|
| 31 | rl->net_width = origin_width;
|
---|
| 32 | rl->net_height = origin_height;
|
---|
| 33 | rl->layer_width = width;
|
---|
| 34 | rl->layer_height = height;
|
---|
| 35 | rl->boxes_number = (rl->layer_width * rl->layer_height * rl->anchor_number);
|
---|
| 36 | rl->output_number = (rl->boxes_number * (rl->classes + rl->coords + 1));
|
---|
[453] | 37 |
|
---|
[458] | 38 | rl->output = malloc(rl->output_number * sizeof(float));
|
---|
| 39 | if (rl->output == NULL)
|
---|
| 40 | {
|
---|
| 41 | flag = -1;
|
---|
| 42 | goto malloc_error;
|
---|
| 43 | }
|
---|
| 44 | rl->boxes = malloc(rl->boxes_number * sizeof(box_t));
|
---|
| 45 | if (rl->boxes == NULL)
|
---|
| 46 | {
|
---|
| 47 | flag = -2;
|
---|
| 48 | goto malloc_error;
|
---|
| 49 | }
|
---|
| 50 | rl->probs_buf = malloc(rl->boxes_number * (rl->classes + 1) * sizeof(float));
|
---|
| 51 | if (rl->probs_buf == NULL)
|
---|
| 52 | {
|
---|
| 53 | flag = -3;
|
---|
| 54 | goto malloc_error;
|
---|
| 55 | }
|
---|
| 56 | rl->probs = malloc(rl->boxes_number * sizeof(float *));
|
---|
| 57 | if (rl->probs == NULL)
|
---|
| 58 | {
|
---|
| 59 | flag = -4;
|
---|
| 60 | goto malloc_error;
|
---|
| 61 | }
|
---|
| 62 | for (uint32_t i = 0; i < rl->boxes_number; i++)
|
---|
| 63 | rl->probs[i] = &(rl->probs_buf[i * (rl->classes + 1)]);
|
---|
| 64 | return 0;
|
---|
[453] | 65 | malloc_error:
|
---|
[458] | 66 | free(rl->output);
|
---|
| 67 | free(rl->boxes);
|
---|
| 68 | free(rl->probs_buf);
|
---|
| 69 | free(rl->probs);
|
---|
| 70 | return flag;
|
---|
[453] | 71 | }
|
---|
| 72 |
|
---|
| 73 | void region_layer_deinit(region_layer_t *rl)
|
---|
| 74 | {
|
---|
[458] | 75 | free(rl->output);
|
---|
| 76 | free(rl->boxes);
|
---|
| 77 | free(rl->probs_buf);
|
---|
| 78 | free(rl->probs);
|
---|
[453] | 79 | }
|
---|
| 80 |
|
---|
| 81 | static inline float sigmoid(float x)
|
---|
| 82 | {
|
---|
[458] | 83 | return 1.f / (1.f + expf(-x));
|
---|
[453] | 84 | }
|
---|
| 85 |
|
---|
| 86 | static void activate_array(region_layer_t *rl, int index, int n)
|
---|
| 87 | {
|
---|
[458] | 88 | float *output = &rl->output[index];
|
---|
| 89 | float *input = &rl->input[index];
|
---|
[453] | 90 |
|
---|
[458] | 91 | for (int i = 0; i < n; ++i)
|
---|
| 92 | output[i] = sigmoid(input[i]);
|
---|
[453] | 93 | }
|
---|
| 94 |
|
---|
| 95 | static int entry_index(region_layer_t *rl, int location, int entry)
|
---|
| 96 | {
|
---|
[458] | 97 | int wh = rl->layer_width * rl->layer_height;
|
---|
| 98 | int n = location / wh;
|
---|
| 99 | int loc = location % wh;
|
---|
[453] | 100 |
|
---|
[458] | 101 | return n * wh * (rl->coords + rl->classes + 1) + entry * wh + loc;
|
---|
[453] | 102 | }
|
---|
| 103 |
|
---|
| 104 | static void softmax(region_layer_t *rl, float *input, int n, int stride, float *output)
|
---|
| 105 | {
|
---|
[458] | 106 | int i;
|
---|
| 107 | float diff;
|
---|
| 108 | float e;
|
---|
| 109 | float sum = 0;
|
---|
| 110 | float largest_i = input[0];
|
---|
[453] | 111 |
|
---|
[458] | 112 | for (i = 0; i < n; ++i)
|
---|
| 113 | {
|
---|
| 114 | if (input[i * stride] > largest_i)
|
---|
| 115 | largest_i = input[i * stride];
|
---|
| 116 | }
|
---|
[453] | 117 |
|
---|
[458] | 118 | for (i = 0; i < n; ++i) {
|
---|
| 119 | diff = input[i * stride] - largest_i;
|
---|
| 120 | e = expf(diff);
|
---|
| 121 | sum += e;
|
---|
| 122 | output[i * stride] = e;
|
---|
| 123 | }
|
---|
| 124 | for (i = 0; i < n; ++i)
|
---|
| 125 | output[i * stride] /= sum;
|
---|
[453] | 126 | }
|
---|
| 127 |
|
---|
| 128 | static void softmax_cpu(region_layer_t *rl, float *input, int n, int batch, int batch_offset, int groups, int stride, float *output)
|
---|
| 129 | {
|
---|
[458] | 130 | int g, b;
|
---|
[453] | 131 |
|
---|
[458] | 132 | for (b = 0; b < batch; ++b) {
|
---|
| 133 | for (g = 0; g < groups; ++g)
|
---|
| 134 | softmax(rl, input + b * batch_offset + g, n, stride, output + b * batch_offset + g);
|
---|
| 135 | }
|
---|
[453] | 136 | }
|
---|
| 137 |
|
---|
| 138 | static void forward_region_layer(region_layer_t *rl)
|
---|
| 139 | {
|
---|
[458] | 140 | int index;
|
---|
[453] | 141 |
|
---|
[458] | 142 | for (index = 0; index < rl->output_number; index++)
|
---|
| 143 | rl->output[index] = rl->input[index];
|
---|
[453] | 144 |
|
---|
[458] | 145 | for (int n = 0; n < rl->anchor_number; ++n)
|
---|
| 146 | {
|
---|
| 147 | index = entry_index(rl, n * rl->layer_width * rl->layer_height, 0);
|
---|
| 148 | activate_array(rl, index, 2 * rl->layer_width * rl->layer_height);
|
---|
| 149 | index = entry_index(rl, n * rl->layer_width * rl->layer_height, 4);
|
---|
| 150 | activate_array(rl, index, rl->layer_width * rl->layer_height);
|
---|
| 151 | }
|
---|
[453] | 152 |
|
---|
[458] | 153 | index = entry_index(rl, 0, rl->coords + 1);
|
---|
| 154 | softmax_cpu(rl, rl->input + index, rl->classes, rl->anchor_number,
|
---|
| 155 | rl->output_number / rl->anchor_number, rl->layer_width * rl->layer_height,
|
---|
| 156 | rl->layer_width * rl->layer_height, rl->output + index);
|
---|
[453] | 157 | }
|
---|
| 158 |
|
---|
| 159 | static void correct_region_boxes(region_layer_t *rl, box_t *boxes)
|
---|
| 160 | {
|
---|
[458] | 161 | uint32_t net_width = rl->net_width;
|
---|
| 162 | uint32_t net_height = rl->net_height;
|
---|
| 163 | uint32_t image_width = rl->image_width;
|
---|
| 164 | uint32_t image_height = rl->image_height;
|
---|
| 165 | uint32_t boxes_number = rl->boxes_number;
|
---|
| 166 | int new_w = 0;
|
---|
| 167 | int new_h = 0;
|
---|
[453] | 168 |
|
---|
[458] | 169 | if (((float)net_width / image_width) <
|
---|
| 170 | ((float)net_height / image_height)) {
|
---|
| 171 | new_w = net_width;
|
---|
| 172 | new_h = (image_height * net_width) / image_width;
|
---|
| 173 | } else {
|
---|
| 174 | new_h = net_height;
|
---|
| 175 | new_w = (image_width * net_height) / image_height;
|
---|
| 176 | }
|
---|
| 177 | for (int i = 0; i < boxes_number; ++i) {
|
---|
| 178 | box_t b = boxes[i];
|
---|
[453] | 179 |
|
---|
[458] | 180 | b.x = (b.x - (net_width - new_w) / 2. / net_width) /
|
---|
| 181 | ((float)new_w / net_width);
|
---|
| 182 | b.y = (b.y - (net_height - new_h) / 2. / net_height) /
|
---|
| 183 | ((float)new_h / net_height);
|
---|
| 184 | b.w *= (float)net_width / new_w;
|
---|
| 185 | b.h *= (float)net_height / new_h;
|
---|
| 186 | boxes[i] = b;
|
---|
| 187 | }
|
---|
[453] | 188 | }
|
---|
| 189 |
|
---|
| 190 | static box_t get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h, int stride)
|
---|
| 191 | {
|
---|
[458] | 192 | volatile box_t b;
|
---|
[453] | 193 |
|
---|
[458] | 194 | b.x = (i + x[index + 0 * stride]) / w;
|
---|
| 195 | b.y = (j + x[index + 1 * stride]) / h;
|
---|
| 196 | b.w = expf(x[index + 2 * stride]) * biases[2 * n] / w;
|
---|
| 197 | b.h = expf(x[index + 3 * stride]) * biases[2 * n + 1] / h;
|
---|
| 198 | return b;
|
---|
[453] | 199 | }
|
---|
| 200 |
|
---|
| 201 | static void get_region_boxes(region_layer_t *rl, float *predictions, float **probs, box_t *boxes)
|
---|
| 202 | {
|
---|
[458] | 203 | uint32_t layer_width = rl->layer_width;
|
---|
| 204 | uint32_t layer_height = rl->layer_height;
|
---|
| 205 | uint32_t anchor_number = rl->anchor_number;
|
---|
| 206 | uint32_t classes = rl->classes;
|
---|
| 207 | uint32_t coords = rl->coords;
|
---|
| 208 | float threshold = rl->threshold;
|
---|
[453] | 209 |
|
---|
[458] | 210 | for (int i = 0; i < layer_width * layer_height; ++i)
|
---|
| 211 | {
|
---|
| 212 | int row = i / layer_width;
|
---|
| 213 | int col = i % layer_width;
|
---|
[453] | 214 |
|
---|
[458] | 215 | for (int n = 0; n < anchor_number; ++n)
|
---|
| 216 | {
|
---|
| 217 | int index = n * layer_width * layer_height + i;
|
---|
[453] | 218 |
|
---|
[458] | 219 | for (int j = 0; j < classes; ++j)
|
---|
| 220 | probs[index][j] = 0;
|
---|
| 221 | int obj_index = entry_index(rl, n * layer_width * layer_height + i, coords);
|
---|
| 222 | int box_index = entry_index(rl, n * layer_width * layer_height + i, 0);
|
---|
| 223 | float scale = predictions[obj_index];
|
---|
[453] | 224 |
|
---|
[458] | 225 | boxes[index] = get_region_box(predictions, rl->anchor, n, box_index, col, row,
|
---|
| 226 | layer_width, layer_height, layer_width * layer_height);
|
---|
[453] | 227 |
|
---|
[458] | 228 | float max = 0;
|
---|
[453] | 229 |
|
---|
[458] | 230 | for (int j = 0; j < classes; ++j)
|
---|
| 231 | {
|
---|
| 232 | int class_index = entry_index(rl, n * layer_width * layer_height + i, coords + 1 + j);
|
---|
| 233 | float prob = scale * predictions[class_index];
|
---|
[453] | 234 |
|
---|
[458] | 235 | probs[index][j] = (prob > threshold) ? prob : 0;
|
---|
| 236 | if (prob > max)
|
---|
| 237 | max = prob;
|
---|
| 238 | }
|
---|
| 239 | probs[index][classes] = max;
|
---|
| 240 | }
|
---|
| 241 | }
|
---|
| 242 | correct_region_boxes(rl, boxes);
|
---|
[453] | 243 | }
|
---|
| 244 |
|
---|
| 245 | static int nms_comparator(void *pa, void *pb)
|
---|
| 246 | {
|
---|
[458] | 247 | sortable_box_t a = *(sortable_box_t *)pa;
|
---|
| 248 | sortable_box_t b = *(sortable_box_t *)pb;
|
---|
| 249 | float diff = a.probs[a.index][b.class] - b.probs[b.index][b.class];
|
---|
[453] | 250 |
|
---|
[458] | 251 | if (diff < 0)
|
---|
| 252 | return 1;
|
---|
| 253 | else if (diff > 0)
|
---|
| 254 | return -1;
|
---|
| 255 | return 0;
|
---|
[453] | 256 | }
|
---|
| 257 |
|
---|
| 258 | static float overlap(float x1, float w1, float x2, float w2)
|
---|
| 259 | {
|
---|
[458] | 260 | float l1 = x1 - w1/2;
|
---|
| 261 | float l2 = x2 - w2/2;
|
---|
| 262 | float left = l1 > l2 ? l1 : l2;
|
---|
| 263 | float r1 = x1 + w1/2;
|
---|
| 264 | float r2 = x2 + w2/2;
|
---|
| 265 | float right = r1 < r2 ? r1 : r2;
|
---|
[453] | 266 |
|
---|
[458] | 267 | return right - left;
|
---|
[453] | 268 | }
|
---|
| 269 |
|
---|
| 270 | static float box_intersection(box_t a, box_t b)
|
---|
| 271 | {
|
---|
[458] | 272 | float w = overlap(a.x, a.w, b.x, b.w);
|
---|
| 273 | float h = overlap(a.y, a.h, b.y, b.h);
|
---|
[453] | 274 |
|
---|
[458] | 275 | if (w < 0 || h < 0)
|
---|
| 276 | return 0;
|
---|
| 277 | return w * h;
|
---|
[453] | 278 | }
|
---|
| 279 |
|
---|
| 280 | static float box_union(box_t a, box_t b)
|
---|
| 281 | {
|
---|
[458] | 282 | float i = box_intersection(a, b);
|
---|
| 283 | float u = a.w * a.h + b.w * b.h - i;
|
---|
[453] | 284 |
|
---|
[458] | 285 | return u;
|
---|
[453] | 286 | }
|
---|
| 287 |
|
---|
| 288 | static float box_iou(box_t a, box_t b)
|
---|
| 289 | {
|
---|
[458] | 290 | return box_intersection(a, b) / box_union(a, b);
|
---|
[453] | 291 | }
|
---|
| 292 |
|
---|
| 293 | static void do_nms_sort(region_layer_t *rl, box_t *boxes, float **probs)
|
---|
| 294 | {
|
---|
[458] | 295 | uint32_t boxes_number = rl->boxes_number;
|
---|
| 296 | uint32_t classes = rl->classes;
|
---|
| 297 | float nms_value = rl->nms_value;
|
---|
| 298 | int i, j, k;
|
---|
| 299 | sortable_box_t s[boxes_number];
|
---|
[453] | 300 |
|
---|
[458] | 301 | for (i = 0; i < boxes_number; ++i)
|
---|
| 302 | {
|
---|
| 303 | s[i].index = i;
|
---|
| 304 | s[i].class = 0;
|
---|
| 305 | s[i].probs = probs;
|
---|
| 306 | }
|
---|
[453] | 307 |
|
---|
[458] | 308 | for (k = 0; k < classes; ++k)
|
---|
| 309 | {
|
---|
| 310 | for (i = 0; i < boxes_number; ++i)
|
---|
| 311 | s[i].class = k;
|
---|
| 312 | qsort(s, boxes_number, sizeof(sortable_box_t), nms_comparator);
|
---|
| 313 | for (i = 0; i < boxes_number; ++i)
|
---|
| 314 | {
|
---|
| 315 | if (probs[s[i].index][k] == 0)
|
---|
| 316 | continue;
|
---|
| 317 | box_t a = boxes[s[i].index];
|
---|
[453] | 318 |
|
---|
[458] | 319 | for (j = i + 1; j < boxes_number; ++j)
|
---|
| 320 | {
|
---|
| 321 | box_t b = boxes[s[j].index];
|
---|
[453] | 322 |
|
---|
[458] | 323 | if (box_iou(a, b) > nms_value)
|
---|
| 324 | probs[s[j].index][k] = 0;
|
---|
| 325 | }
|
---|
| 326 | }
|
---|
| 327 | }
|
---|
[453] | 328 | }
|
---|
| 329 |
|
---|
| 330 | static int max_index(float *a, int n)
|
---|
| 331 | {
|
---|
[458] | 332 | int i, max_i = 0;
|
---|
| 333 | float max = a[0];
|
---|
[453] | 334 |
|
---|
[458] | 335 | for (i = 1; i < n; ++i)
|
---|
| 336 | {
|
---|
| 337 | if (a[i] > max)
|
---|
| 338 | {
|
---|
| 339 | max = a[i];
|
---|
| 340 | max_i = i;
|
---|
| 341 | }
|
---|
| 342 | }
|
---|
| 343 | return max_i;
|
---|
[453] | 344 | }
|
---|
| 345 |
|
---|
| 346 | static void region_layer_output(region_layer_t *rl, obj_info_t *obj_info)
|
---|
| 347 | {
|
---|
[458] | 348 | uint32_t obj_number = 0;
|
---|
| 349 | uint32_t image_width = rl->image_width;
|
---|
| 350 | uint32_t image_height = rl->image_height;
|
---|
| 351 | uint32_t boxes_number = rl->boxes_number;
|
---|
| 352 | float threshold = rl->threshold;
|
---|
| 353 | box_t *boxes = (box_t *)rl->boxes;
|
---|
| 354 |
|
---|
| 355 | for (int i = 0; i < rl->boxes_number; ++i)
|
---|
| 356 | {
|
---|
| 357 | int class = max_index(rl->probs[i], rl->classes);
|
---|
| 358 | float prob = rl->probs[i][class];
|
---|
[453] | 359 |
|
---|
[458] | 360 | if (prob > threshold)
|
---|
| 361 | {
|
---|
| 362 | box_t *b = boxes + i;
|
---|
| 363 | obj_info->obj[obj_number].x1 = b->x * image_width - (b->w * image_width / 2);
|
---|
| 364 | obj_info->obj[obj_number].y1 = b->y * image_height - (b->h * image_height / 2);
|
---|
| 365 | obj_info->obj[obj_number].x2 = b->x * image_width + (b->w * image_width / 2);
|
---|
| 366 | obj_info->obj[obj_number].y2 = b->y * image_height + (b->h * image_height / 2);
|
---|
| 367 | obj_info->obj[obj_number].class_id = class;
|
---|
| 368 | obj_info->obj[obj_number].prob = prob;
|
---|
| 369 | obj_number++;
|
---|
| 370 | }
|
---|
| 371 | }
|
---|
| 372 | obj_info->obj_number = obj_number;
|
---|
[453] | 373 | }
|
---|
| 374 |
|
---|
| 375 | void region_layer_run(region_layer_t *rl, obj_info_t *obj_info)
|
---|
| 376 | {
|
---|
[458] | 377 | forward_region_layer(rl);
|
---|
| 378 | get_region_boxes(rl, rl->output, rl->probs, rl->boxes);
|
---|
| 379 | do_nms_sort(rl, rl->boxes, rl->probs);
|
---|
| 380 | // region_layer_output(rl, obj_info);
|
---|
[453] | 381 | }
|
---|
| 382 |
|
---|
| 383 | void region_layer_draw_boxes(region_layer_t *rl, callback_draw_box callback)
|
---|
| 384 | {
|
---|
[458] | 385 | uint32_t image_width = rl->image_width;
|
---|
| 386 | uint32_t image_height = rl->image_height;
|
---|
| 387 | float threshold = rl->threshold;
|
---|
| 388 | box_t *boxes = (box_t *)rl->boxes;
|
---|
[453] | 389 |
|
---|
[458] | 390 | for (int i = 0; i < rl->boxes_number; ++i)
|
---|
| 391 | {
|
---|
| 392 | int class = max_index(rl->probs[i], rl->classes);
|
---|
| 393 | float prob = rl->probs[i][class];
|
---|
[453] | 394 |
|
---|
[458] | 395 | if (prob > threshold)
|
---|
| 396 | {
|
---|
| 397 | box_t *b = boxes + i;
|
---|
| 398 | uint32_t x1 = b->x * image_width - (b->w * image_width / 2);
|
---|
| 399 | uint32_t y1 = b->y * image_height - (b->h * image_height / 2);
|
---|
| 400 | uint32_t x2 = b->x * image_width + (b->w * image_width / 2);
|
---|
| 401 | uint32_t y2 = b->y * image_height + (b->h * image_height / 2);
|
---|
| 402 | callback(x1, y1, x2, y2, class, prob);
|
---|
| 403 | }
|
---|
| 404 | }
|
---|
[453] | 405 | }
|
---|