1 | #include <stdlib.h>
|
---|
2 | #include <math.h>
|
---|
3 | #include <stdio.h>
|
---|
4 | #include "region_layer.h"
|
---|
5 |
|
---|
6 | typedef struct
|
---|
7 | {
|
---|
8 | float x;
|
---|
9 | float y;
|
---|
10 | float w;
|
---|
11 | float h;
|
---|
12 | } box_t;
|
---|
13 |
|
---|
14 | typedef struct
|
---|
15 | {
|
---|
16 | int index;
|
---|
17 | int class;
|
---|
18 | float **probs;
|
---|
19 | } sortable_box_t;
|
---|
20 |
|
---|
21 |
|
---|
22 | int region_layer_init(region_layer_t *rl, int width, int height, int channels, int origin_width, int origin_height)
|
---|
23 | {
|
---|
24 | int flag = 0;
|
---|
25 |
|
---|
26 | rl->coords = 4;
|
---|
27 | rl->image_width = 320;
|
---|
28 | rl->image_height = 240;
|
---|
29 |
|
---|
30 | rl->classes = channels / 5 - 5;
|
---|
31 | rl->net_width = origin_width;
|
---|
32 | rl->net_height = origin_height;
|
---|
33 | rl->layer_width = width;
|
---|
34 | rl->layer_height = height;
|
---|
35 | rl->boxes_number = (rl->layer_width * rl->layer_height * rl->anchor_number);
|
---|
36 | rl->output_number = (rl->boxes_number * (rl->classes + rl->coords + 1));
|
---|
37 |
|
---|
38 | rl->output = malloc(rl->output_number * sizeof(float));
|
---|
39 | if (rl->output == NULL)
|
---|
40 | {
|
---|
41 | flag = -1;
|
---|
42 | goto malloc_error;
|
---|
43 | }
|
---|
44 | rl->boxes = malloc(rl->boxes_number * sizeof(box_t));
|
---|
45 | if (rl->boxes == NULL)
|
---|
46 | {
|
---|
47 | flag = -2;
|
---|
48 | goto malloc_error;
|
---|
49 | }
|
---|
50 | rl->probs_buf = malloc(rl->boxes_number * (rl->classes + 1) * sizeof(float));
|
---|
51 | if (rl->probs_buf == NULL)
|
---|
52 | {
|
---|
53 | flag = -3;
|
---|
54 | goto malloc_error;
|
---|
55 | }
|
---|
56 | rl->probs = malloc(rl->boxes_number * sizeof(float *));
|
---|
57 | if (rl->probs == NULL)
|
---|
58 | {
|
---|
59 | flag = -4;
|
---|
60 | goto malloc_error;
|
---|
61 | }
|
---|
62 | for (uint32_t i = 0; i < rl->boxes_number; i++)
|
---|
63 | rl->probs[i] = &(rl->probs_buf[i * (rl->classes + 1)]);
|
---|
64 | return 0;
|
---|
65 | malloc_error:
|
---|
66 | free(rl->output);
|
---|
67 | free(rl->boxes);
|
---|
68 | free(rl->probs_buf);
|
---|
69 | free(rl->probs);
|
---|
70 | return flag;
|
---|
71 | }
|
---|
72 |
|
---|
73 | void region_layer_deinit(region_layer_t *rl)
|
---|
74 | {
|
---|
75 | free(rl->output);
|
---|
76 | free(rl->boxes);
|
---|
77 | free(rl->probs_buf);
|
---|
78 | free(rl->probs);
|
---|
79 | }
|
---|
80 |
|
---|
81 | static inline float sigmoid(float x)
|
---|
82 | {
|
---|
83 | return 1.f / (1.f + expf(-x));
|
---|
84 | }
|
---|
85 |
|
---|
86 | static void activate_array(region_layer_t *rl, int index, int n)
|
---|
87 | {
|
---|
88 | float *output = &rl->output[index];
|
---|
89 | float *input = &rl->input[index];
|
---|
90 |
|
---|
91 | for (int i = 0; i < n; ++i)
|
---|
92 | output[i] = sigmoid(input[i]);
|
---|
93 | }
|
---|
94 |
|
---|
95 | static int entry_index(region_layer_t *rl, int location, int entry)
|
---|
96 | {
|
---|
97 | int wh = rl->layer_width * rl->layer_height;
|
---|
98 | int n = location / wh;
|
---|
99 | int loc = location % wh;
|
---|
100 |
|
---|
101 | return n * wh * (rl->coords + rl->classes + 1) + entry * wh + loc;
|
---|
102 | }
|
---|
103 |
|
---|
104 | static void softmax(region_layer_t *rl, float *input, int n, int stride, float *output)
|
---|
105 | {
|
---|
106 | int i;
|
---|
107 | float diff;
|
---|
108 | float e;
|
---|
109 | float sum = 0;
|
---|
110 | float largest_i = input[0];
|
---|
111 |
|
---|
112 | for (i = 0; i < n; ++i)
|
---|
113 | {
|
---|
114 | if (input[i * stride] > largest_i)
|
---|
115 | largest_i = input[i * stride];
|
---|
116 | }
|
---|
117 |
|
---|
118 | for (i = 0; i < n; ++i) {
|
---|
119 | diff = input[i * stride] - largest_i;
|
---|
120 | e = expf(diff);
|
---|
121 | sum += e;
|
---|
122 | output[i * stride] = e;
|
---|
123 | }
|
---|
124 | for (i = 0; i < n; ++i)
|
---|
125 | output[i * stride] /= sum;
|
---|
126 | }
|
---|
127 |
|
---|
128 | static void softmax_cpu(region_layer_t *rl, float *input, int n, int batch, int batch_offset, int groups, int stride, float *output)
|
---|
129 | {
|
---|
130 | int g, b;
|
---|
131 |
|
---|
132 | for (b = 0; b < batch; ++b) {
|
---|
133 | for (g = 0; g < groups; ++g)
|
---|
134 | softmax(rl, input + b * batch_offset + g, n, stride, output + b * batch_offset + g);
|
---|
135 | }
|
---|
136 | }
|
---|
137 |
|
---|
138 | static void forward_region_layer(region_layer_t *rl)
|
---|
139 | {
|
---|
140 | int index;
|
---|
141 |
|
---|
142 | for (index = 0; index < rl->output_number; index++)
|
---|
143 | rl->output[index] = rl->input[index];
|
---|
144 |
|
---|
145 | for (int n = 0; n < rl->anchor_number; ++n)
|
---|
146 | {
|
---|
147 | index = entry_index(rl, n * rl->layer_width * rl->layer_height, 0);
|
---|
148 | activate_array(rl, index, 2 * rl->layer_width * rl->layer_height);
|
---|
149 | index = entry_index(rl, n * rl->layer_width * rl->layer_height, 4);
|
---|
150 | activate_array(rl, index, rl->layer_width * rl->layer_height);
|
---|
151 | }
|
---|
152 |
|
---|
153 | index = entry_index(rl, 0, rl->coords + 1);
|
---|
154 | softmax_cpu(rl, rl->input + index, rl->classes, rl->anchor_number,
|
---|
155 | rl->output_number / rl->anchor_number, rl->layer_width * rl->layer_height,
|
---|
156 | rl->layer_width * rl->layer_height, rl->output + index);
|
---|
157 | }
|
---|
158 |
|
---|
159 | static void correct_region_boxes(region_layer_t *rl, box_t *boxes)
|
---|
160 | {
|
---|
161 | uint32_t net_width = rl->net_width;
|
---|
162 | uint32_t net_height = rl->net_height;
|
---|
163 | uint32_t image_width = rl->image_width;
|
---|
164 | uint32_t image_height = rl->image_height;
|
---|
165 | uint32_t boxes_number = rl->boxes_number;
|
---|
166 | int new_w = 0;
|
---|
167 | int new_h = 0;
|
---|
168 |
|
---|
169 | if (((float)net_width / image_width) <
|
---|
170 | ((float)net_height / image_height)) {
|
---|
171 | new_w = net_width;
|
---|
172 | new_h = (image_height * net_width) / image_width;
|
---|
173 | } else {
|
---|
174 | new_h = net_height;
|
---|
175 | new_w = (image_width * net_height) / image_height;
|
---|
176 | }
|
---|
177 | for (int i = 0; i < boxes_number; ++i) {
|
---|
178 | box_t b = boxes[i];
|
---|
179 |
|
---|
180 | b.x = (b.x - (net_width - new_w) / 2. / net_width) /
|
---|
181 | ((float)new_w / net_width);
|
---|
182 | b.y = (b.y - (net_height - new_h) / 2. / net_height) /
|
---|
183 | ((float)new_h / net_height);
|
---|
184 | b.w *= (float)net_width / new_w;
|
---|
185 | b.h *= (float)net_height / new_h;
|
---|
186 | boxes[i] = b;
|
---|
187 | }
|
---|
188 | }
|
---|
189 |
|
---|
190 | static box_t get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h, int stride)
|
---|
191 | {
|
---|
192 | volatile box_t b;
|
---|
193 |
|
---|
194 | b.x = (i + x[index + 0 * stride]) / w;
|
---|
195 | b.y = (j + x[index + 1 * stride]) / h;
|
---|
196 | b.w = expf(x[index + 2 * stride]) * biases[2 * n] / w;
|
---|
197 | b.h = expf(x[index + 3 * stride]) * biases[2 * n + 1] / h;
|
---|
198 | return b;
|
---|
199 | }
|
---|
200 |
|
---|
201 | static void get_region_boxes(region_layer_t *rl, float *predictions, float **probs, box_t *boxes)
|
---|
202 | {
|
---|
203 | uint32_t layer_width = rl->layer_width;
|
---|
204 | uint32_t layer_height = rl->layer_height;
|
---|
205 | uint32_t anchor_number = rl->anchor_number;
|
---|
206 | uint32_t classes = rl->classes;
|
---|
207 | uint32_t coords = rl->coords;
|
---|
208 | float threshold = rl->threshold;
|
---|
209 |
|
---|
210 | for (int i = 0; i < layer_width * layer_height; ++i)
|
---|
211 | {
|
---|
212 | int row = i / layer_width;
|
---|
213 | int col = i % layer_width;
|
---|
214 |
|
---|
215 | for (int n = 0; n < anchor_number; ++n)
|
---|
216 | {
|
---|
217 | int index = n * layer_width * layer_height + i;
|
---|
218 |
|
---|
219 | for (int j = 0; j < classes; ++j)
|
---|
220 | probs[index][j] = 0;
|
---|
221 | int obj_index = entry_index(rl, n * layer_width * layer_height + i, coords);
|
---|
222 | int box_index = entry_index(rl, n * layer_width * layer_height + i, 0);
|
---|
223 | float scale = predictions[obj_index];
|
---|
224 |
|
---|
225 | boxes[index] = get_region_box(predictions, rl->anchor, n, box_index, col, row,
|
---|
226 | layer_width, layer_height, layer_width * layer_height);
|
---|
227 |
|
---|
228 | float max = 0;
|
---|
229 |
|
---|
230 | for (int j = 0; j < classes; ++j)
|
---|
231 | {
|
---|
232 | int class_index = entry_index(rl, n * layer_width * layer_height + i, coords + 1 + j);
|
---|
233 | float prob = scale * predictions[class_index];
|
---|
234 |
|
---|
235 | probs[index][j] = (prob > threshold) ? prob : 0;
|
---|
236 | if (prob > max)
|
---|
237 | max = prob;
|
---|
238 | }
|
---|
239 | probs[index][classes] = max;
|
---|
240 | }
|
---|
241 | }
|
---|
242 | correct_region_boxes(rl, boxes);
|
---|
243 | }
|
---|
244 |
|
---|
245 | static int nms_comparator(void *pa, void *pb)
|
---|
246 | {
|
---|
247 | sortable_box_t a = *(sortable_box_t *)pa;
|
---|
248 | sortable_box_t b = *(sortable_box_t *)pb;
|
---|
249 | float diff = a.probs[a.index][b.class] - b.probs[b.index][b.class];
|
---|
250 |
|
---|
251 | if (diff < 0)
|
---|
252 | return 1;
|
---|
253 | else if (diff > 0)
|
---|
254 | return -1;
|
---|
255 | return 0;
|
---|
256 | }
|
---|
257 |
|
---|
258 | static float overlap(float x1, float w1, float x2, float w2)
|
---|
259 | {
|
---|
260 | float l1 = x1 - w1/2;
|
---|
261 | float l2 = x2 - w2/2;
|
---|
262 | float left = l1 > l2 ? l1 : l2;
|
---|
263 | float r1 = x1 + w1/2;
|
---|
264 | float r2 = x2 + w2/2;
|
---|
265 | float right = r1 < r2 ? r1 : r2;
|
---|
266 |
|
---|
267 | return right - left;
|
---|
268 | }
|
---|
269 |
|
---|
270 | static float box_intersection(box_t a, box_t b)
|
---|
271 | {
|
---|
272 | float w = overlap(a.x, a.w, b.x, b.w);
|
---|
273 | float h = overlap(a.y, a.h, b.y, b.h);
|
---|
274 |
|
---|
275 | if (w < 0 || h < 0)
|
---|
276 | return 0;
|
---|
277 | return w * h;
|
---|
278 | }
|
---|
279 |
|
---|
280 | static float box_union(box_t a, box_t b)
|
---|
281 | {
|
---|
282 | float i = box_intersection(a, b);
|
---|
283 | float u = a.w * a.h + b.w * b.h - i;
|
---|
284 |
|
---|
285 | return u;
|
---|
286 | }
|
---|
287 |
|
---|
288 | static float box_iou(box_t a, box_t b)
|
---|
289 | {
|
---|
290 | return box_intersection(a, b) / box_union(a, b);
|
---|
291 | }
|
---|
292 |
|
---|
293 | static void do_nms_sort(region_layer_t *rl, box_t *boxes, float **probs)
|
---|
294 | {
|
---|
295 | uint32_t boxes_number = rl->boxes_number;
|
---|
296 | uint32_t classes = rl->classes;
|
---|
297 | float nms_value = rl->nms_value;
|
---|
298 | int i, j, k;
|
---|
299 | sortable_box_t s[boxes_number];
|
---|
300 |
|
---|
301 | for (i = 0; i < boxes_number; ++i)
|
---|
302 | {
|
---|
303 | s[i].index = i;
|
---|
304 | s[i].class = 0;
|
---|
305 | s[i].probs = probs;
|
---|
306 | }
|
---|
307 |
|
---|
308 | for (k = 0; k < classes; ++k)
|
---|
309 | {
|
---|
310 | for (i = 0; i < boxes_number; ++i)
|
---|
311 | s[i].class = k;
|
---|
312 | qsort(s, boxes_number, sizeof(sortable_box_t), nms_comparator);
|
---|
313 | for (i = 0; i < boxes_number; ++i)
|
---|
314 | {
|
---|
315 | if (probs[s[i].index][k] == 0)
|
---|
316 | continue;
|
---|
317 | box_t a = boxes[s[i].index];
|
---|
318 |
|
---|
319 | for (j = i + 1; j < boxes_number; ++j)
|
---|
320 | {
|
---|
321 | box_t b = boxes[s[j].index];
|
---|
322 |
|
---|
323 | if (box_iou(a, b) > nms_value)
|
---|
324 | probs[s[j].index][k] = 0;
|
---|
325 | }
|
---|
326 | }
|
---|
327 | }
|
---|
328 | }
|
---|
329 |
|
---|
330 | static int max_index(float *a, int n)
|
---|
331 | {
|
---|
332 | int i, max_i = 0;
|
---|
333 | float max = a[0];
|
---|
334 |
|
---|
335 | for (i = 1; i < n; ++i)
|
---|
336 | {
|
---|
337 | if (a[i] > max)
|
---|
338 | {
|
---|
339 | max = a[i];
|
---|
340 | max_i = i;
|
---|
341 | }
|
---|
342 | }
|
---|
343 | return max_i;
|
---|
344 | }
|
---|
345 |
|
---|
346 | static void region_layer_output(region_layer_t *rl, obj_info_t *obj_info)
|
---|
347 | {
|
---|
348 | uint32_t obj_number = 0;
|
---|
349 | uint32_t image_width = rl->image_width;
|
---|
350 | uint32_t image_height = rl->image_height;
|
---|
351 | uint32_t boxes_number = rl->boxes_number;
|
---|
352 | float threshold = rl->threshold;
|
---|
353 | box_t *boxes = (box_t *)rl->boxes;
|
---|
354 |
|
---|
355 | for (int i = 0; i < rl->boxes_number; ++i)
|
---|
356 | {
|
---|
357 | int class = max_index(rl->probs[i], rl->classes);
|
---|
358 | float prob = rl->probs[i][class];
|
---|
359 |
|
---|
360 | if (prob > threshold)
|
---|
361 | {
|
---|
362 | box_t *b = boxes + i;
|
---|
363 | obj_info->obj[obj_number].x1 = b->x * image_width - (b->w * image_width / 2);
|
---|
364 | obj_info->obj[obj_number].y1 = b->y * image_height - (b->h * image_height / 2);
|
---|
365 | obj_info->obj[obj_number].x2 = b->x * image_width + (b->w * image_width / 2);
|
---|
366 | obj_info->obj[obj_number].y2 = b->y * image_height + (b->h * image_height / 2);
|
---|
367 | obj_info->obj[obj_number].class_id = class;
|
---|
368 | obj_info->obj[obj_number].prob = prob;
|
---|
369 | obj_number++;
|
---|
370 | }
|
---|
371 | }
|
---|
372 | obj_info->obj_number = obj_number;
|
---|
373 | }
|
---|
374 |
|
---|
375 | void region_layer_run(region_layer_t *rl, obj_info_t *obj_info)
|
---|
376 | {
|
---|
377 | forward_region_layer(rl);
|
---|
378 | get_region_boxes(rl, rl->output, rl->probs, rl->boxes);
|
---|
379 | do_nms_sort(rl, rl->boxes, rl->probs);
|
---|
380 | // region_layer_output(rl, obj_info);
|
---|
381 | }
|
---|
382 |
|
---|
383 | void region_layer_draw_boxes(region_layer_t *rl, callback_draw_box callback)
|
---|
384 | {
|
---|
385 | uint32_t image_width = rl->image_width;
|
---|
386 | uint32_t image_height = rl->image_height;
|
---|
387 | float threshold = rl->threshold;
|
---|
388 | box_t *boxes = (box_t *)rl->boxes;
|
---|
389 |
|
---|
390 | for (int i = 0; i < rl->boxes_number; ++i)
|
---|
391 | {
|
---|
392 | int class = max_index(rl->probs[i], rl->classes);
|
---|
393 | float prob = rl->probs[i][class];
|
---|
394 |
|
---|
395 | if (prob > threshold)
|
---|
396 | {
|
---|
397 | box_t *b = boxes + i;
|
---|
398 | uint32_t x1 = b->x * image_width - (b->w * image_width / 2);
|
---|
399 | uint32_t y1 = b->y * image_height - (b->h * image_height / 2);
|
---|
400 | uint32_t x2 = b->x * image_width + (b->w * image_width / 2);
|
---|
401 | uint32_t y2 = b->y * image_height + (b->h * image_height / 2);
|
---|
402 | callback(x1, y1, x2, y2, class, prob);
|
---|
403 | }
|
---|
404 | }
|
---|
405 | }
|
---|