source: azure_iot_hub_riscv/trunk/app_iothub_client/kendryte/region_layer.c@ 458

Last change on this file since 458 was 458, checked in by coas-nagasima, 4 years ago

SPIとSerial、KPUの動作を改善

  • Property svn:eol-style set to native
  • Property svn:mime-type set to text/x-csrc;charset=UTF-8
File size: 9.8 KB
RevLine 
[453]1#include <stdlib.h>
2#include <math.h>
3#include <stdio.h>
4#include "region_layer.h"
5
6typedef struct
7{
[458]8 float x;
9 float y;
10 float w;
11 float h;
[453]12} box_t;
13
14typedef struct
15{
[458]16 int index;
17 int class;
18 float **probs;
[453]19} sortable_box_t;
20
21
22int region_layer_init(region_layer_t *rl, int width, int height, int channels, int origin_width, int origin_height)
23{
[458]24 int flag = 0;
[453]25
[458]26 rl->coords = 4;
27 rl->image_width = 320;
28 rl->image_height = 240;
[453]29
[458]30 rl->classes = channels / 5 - 5;
31 rl->net_width = origin_width;
32 rl->net_height = origin_height;
33 rl->layer_width = width;
34 rl->layer_height = height;
35 rl->boxes_number = (rl->layer_width * rl->layer_height * rl->anchor_number);
36 rl->output_number = (rl->boxes_number * (rl->classes + rl->coords + 1));
[453]37
[458]38 rl->output = malloc(rl->output_number * sizeof(float));
39 if (rl->output == NULL)
40 {
41 flag = -1;
42 goto malloc_error;
43 }
44 rl->boxes = malloc(rl->boxes_number * sizeof(box_t));
45 if (rl->boxes == NULL)
46 {
47 flag = -2;
48 goto malloc_error;
49 }
50 rl->probs_buf = malloc(rl->boxes_number * (rl->classes + 1) * sizeof(float));
51 if (rl->probs_buf == NULL)
52 {
53 flag = -3;
54 goto malloc_error;
55 }
56 rl->probs = malloc(rl->boxes_number * sizeof(float *));
57 if (rl->probs == NULL)
58 {
59 flag = -4;
60 goto malloc_error;
61 }
62 for (uint32_t i = 0; i < rl->boxes_number; i++)
63 rl->probs[i] = &(rl->probs_buf[i * (rl->classes + 1)]);
64 return 0;
[453]65malloc_error:
[458]66 free(rl->output);
67 free(rl->boxes);
68 free(rl->probs_buf);
69 free(rl->probs);
70 return flag;
[453]71}
72
73void region_layer_deinit(region_layer_t *rl)
74{
[458]75 free(rl->output);
76 free(rl->boxes);
77 free(rl->probs_buf);
78 free(rl->probs);
[453]79}
80
81static inline float sigmoid(float x)
82{
[458]83 return 1.f / (1.f + expf(-x));
[453]84}
85
86static void activate_array(region_layer_t *rl, int index, int n)
87{
[458]88 float *output = &rl->output[index];
89 float *input = &rl->input[index];
[453]90
[458]91 for (int i = 0; i < n; ++i)
92 output[i] = sigmoid(input[i]);
[453]93}
94
95static int entry_index(region_layer_t *rl, int location, int entry)
96{
[458]97 int wh = rl->layer_width * rl->layer_height;
98 int n = location / wh;
99 int loc = location % wh;
[453]100
[458]101 return n * wh * (rl->coords + rl->classes + 1) + entry * wh + loc;
[453]102}
103
104static void softmax(region_layer_t *rl, float *input, int n, int stride, float *output)
105{
[458]106 int i;
107 float diff;
108 float e;
109 float sum = 0;
110 float largest_i = input[0];
[453]111
[458]112 for (i = 0; i < n; ++i)
113 {
114 if (input[i * stride] > largest_i)
115 largest_i = input[i * stride];
116 }
[453]117
[458]118 for (i = 0; i < n; ++i) {
119 diff = input[i * stride] - largest_i;
120 e = expf(diff);
121 sum += e;
122 output[i * stride] = e;
123 }
124 for (i = 0; i < n; ++i)
125 output[i * stride] /= sum;
[453]126}
127
128static void softmax_cpu(region_layer_t *rl, float *input, int n, int batch, int batch_offset, int groups, int stride, float *output)
129{
[458]130 int g, b;
[453]131
[458]132 for (b = 0; b < batch; ++b) {
133 for (g = 0; g < groups; ++g)
134 softmax(rl, input + b * batch_offset + g, n, stride, output + b * batch_offset + g);
135 }
[453]136}
137
138static void forward_region_layer(region_layer_t *rl)
139{
[458]140 int index;
[453]141
[458]142 for (index = 0; index < rl->output_number; index++)
143 rl->output[index] = rl->input[index];
[453]144
[458]145 for (int n = 0; n < rl->anchor_number; ++n)
146 {
147 index = entry_index(rl, n * rl->layer_width * rl->layer_height, 0);
148 activate_array(rl, index, 2 * rl->layer_width * rl->layer_height);
149 index = entry_index(rl, n * rl->layer_width * rl->layer_height, 4);
150 activate_array(rl, index, rl->layer_width * rl->layer_height);
151 }
[453]152
[458]153 index = entry_index(rl, 0, rl->coords + 1);
154 softmax_cpu(rl, rl->input + index, rl->classes, rl->anchor_number,
155 rl->output_number / rl->anchor_number, rl->layer_width * rl->layer_height,
156 rl->layer_width * rl->layer_height, rl->output + index);
[453]157}
158
159static void correct_region_boxes(region_layer_t *rl, box_t *boxes)
160{
[458]161 uint32_t net_width = rl->net_width;
162 uint32_t net_height = rl->net_height;
163 uint32_t image_width = rl->image_width;
164 uint32_t image_height = rl->image_height;
165 uint32_t boxes_number = rl->boxes_number;
166 int new_w = 0;
167 int new_h = 0;
[453]168
[458]169 if (((float)net_width / image_width) <
170 ((float)net_height / image_height)) {
171 new_w = net_width;
172 new_h = (image_height * net_width) / image_width;
173 } else {
174 new_h = net_height;
175 new_w = (image_width * net_height) / image_height;
176 }
177 for (int i = 0; i < boxes_number; ++i) {
178 box_t b = boxes[i];
[453]179
[458]180 b.x = (b.x - (net_width - new_w) / 2. / net_width) /
181 ((float)new_w / net_width);
182 b.y = (b.y - (net_height - new_h) / 2. / net_height) /
183 ((float)new_h / net_height);
184 b.w *= (float)net_width / new_w;
185 b.h *= (float)net_height / new_h;
186 boxes[i] = b;
187 }
[453]188}
189
190static box_t get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h, int stride)
191{
[458]192 volatile box_t b;
[453]193
[458]194 b.x = (i + x[index + 0 * stride]) / w;
195 b.y = (j + x[index + 1 * stride]) / h;
196 b.w = expf(x[index + 2 * stride]) * biases[2 * n] / w;
197 b.h = expf(x[index + 3 * stride]) * biases[2 * n + 1] / h;
198 return b;
[453]199}
200
201static void get_region_boxes(region_layer_t *rl, float *predictions, float **probs, box_t *boxes)
202{
[458]203 uint32_t layer_width = rl->layer_width;
204 uint32_t layer_height = rl->layer_height;
205 uint32_t anchor_number = rl->anchor_number;
206 uint32_t classes = rl->classes;
207 uint32_t coords = rl->coords;
208 float threshold = rl->threshold;
[453]209
[458]210 for (int i = 0; i < layer_width * layer_height; ++i)
211 {
212 int row = i / layer_width;
213 int col = i % layer_width;
[453]214
[458]215 for (int n = 0; n < anchor_number; ++n)
216 {
217 int index = n * layer_width * layer_height + i;
[453]218
[458]219 for (int j = 0; j < classes; ++j)
220 probs[index][j] = 0;
221 int obj_index = entry_index(rl, n * layer_width * layer_height + i, coords);
222 int box_index = entry_index(rl, n * layer_width * layer_height + i, 0);
223 float scale = predictions[obj_index];
[453]224
[458]225 boxes[index] = get_region_box(predictions, rl->anchor, n, box_index, col, row,
226 layer_width, layer_height, layer_width * layer_height);
[453]227
[458]228 float max = 0;
[453]229
[458]230 for (int j = 0; j < classes; ++j)
231 {
232 int class_index = entry_index(rl, n * layer_width * layer_height + i, coords + 1 + j);
233 float prob = scale * predictions[class_index];
[453]234
[458]235 probs[index][j] = (prob > threshold) ? prob : 0;
236 if (prob > max)
237 max = prob;
238 }
239 probs[index][classes] = max;
240 }
241 }
242 correct_region_boxes(rl, boxes);
[453]243}
244
245static int nms_comparator(void *pa, void *pb)
246{
[458]247 sortable_box_t a = *(sortable_box_t *)pa;
248 sortable_box_t b = *(sortable_box_t *)pb;
249 float diff = a.probs[a.index][b.class] - b.probs[b.index][b.class];
[453]250
[458]251 if (diff < 0)
252 return 1;
253 else if (diff > 0)
254 return -1;
255 return 0;
[453]256}
257
258static float overlap(float x1, float w1, float x2, float w2)
259{
[458]260 float l1 = x1 - w1/2;
261 float l2 = x2 - w2/2;
262 float left = l1 > l2 ? l1 : l2;
263 float r1 = x1 + w1/2;
264 float r2 = x2 + w2/2;
265 float right = r1 < r2 ? r1 : r2;
[453]266
[458]267 return right - left;
[453]268}
269
270static float box_intersection(box_t a, box_t b)
271{
[458]272 float w = overlap(a.x, a.w, b.x, b.w);
273 float h = overlap(a.y, a.h, b.y, b.h);
[453]274
[458]275 if (w < 0 || h < 0)
276 return 0;
277 return w * h;
[453]278}
279
280static float box_union(box_t a, box_t b)
281{
[458]282 float i = box_intersection(a, b);
283 float u = a.w * a.h + b.w * b.h - i;
[453]284
[458]285 return u;
[453]286}
287
288static float box_iou(box_t a, box_t b)
289{
[458]290 return box_intersection(a, b) / box_union(a, b);
[453]291}
292
293static void do_nms_sort(region_layer_t *rl, box_t *boxes, float **probs)
294{
[458]295 uint32_t boxes_number = rl->boxes_number;
296 uint32_t classes = rl->classes;
297 float nms_value = rl->nms_value;
298 int i, j, k;
299 sortable_box_t s[boxes_number];
[453]300
[458]301 for (i = 0; i < boxes_number; ++i)
302 {
303 s[i].index = i;
304 s[i].class = 0;
305 s[i].probs = probs;
306 }
[453]307
[458]308 for (k = 0; k < classes; ++k)
309 {
310 for (i = 0; i < boxes_number; ++i)
311 s[i].class = k;
312 qsort(s, boxes_number, sizeof(sortable_box_t), nms_comparator);
313 for (i = 0; i < boxes_number; ++i)
314 {
315 if (probs[s[i].index][k] == 0)
316 continue;
317 box_t a = boxes[s[i].index];
[453]318
[458]319 for (j = i + 1; j < boxes_number; ++j)
320 {
321 box_t b = boxes[s[j].index];
[453]322
[458]323 if (box_iou(a, b) > nms_value)
324 probs[s[j].index][k] = 0;
325 }
326 }
327 }
[453]328}
329
330static int max_index(float *a, int n)
331{
[458]332 int i, max_i = 0;
333 float max = a[0];
[453]334
[458]335 for (i = 1; i < n; ++i)
336 {
337 if (a[i] > max)
338 {
339 max = a[i];
340 max_i = i;
341 }
342 }
343 return max_i;
[453]344}
345
346static void region_layer_output(region_layer_t *rl, obj_info_t *obj_info)
347{
[458]348 uint32_t obj_number = 0;
349 uint32_t image_width = rl->image_width;
350 uint32_t image_height = rl->image_height;
351 uint32_t boxes_number = rl->boxes_number;
352 float threshold = rl->threshold;
353 box_t *boxes = (box_t *)rl->boxes;
354
355 for (int i = 0; i < rl->boxes_number; ++i)
356 {
357 int class = max_index(rl->probs[i], rl->classes);
358 float prob = rl->probs[i][class];
[453]359
[458]360 if (prob > threshold)
361 {
362 box_t *b = boxes + i;
363 obj_info->obj[obj_number].x1 = b->x * image_width - (b->w * image_width / 2);
364 obj_info->obj[obj_number].y1 = b->y * image_height - (b->h * image_height / 2);
365 obj_info->obj[obj_number].x2 = b->x * image_width + (b->w * image_width / 2);
366 obj_info->obj[obj_number].y2 = b->y * image_height + (b->h * image_height / 2);
367 obj_info->obj[obj_number].class_id = class;
368 obj_info->obj[obj_number].prob = prob;
369 obj_number++;
370 }
371 }
372 obj_info->obj_number = obj_number;
[453]373}
374
375void region_layer_run(region_layer_t *rl, obj_info_t *obj_info)
376{
[458]377 forward_region_layer(rl);
378 get_region_boxes(rl, rl->output, rl->probs, rl->boxes);
379 do_nms_sort(rl, rl->boxes, rl->probs);
380 // region_layer_output(rl, obj_info);
[453]381}
382
383void region_layer_draw_boxes(region_layer_t *rl, callback_draw_box callback)
384{
[458]385 uint32_t image_width = rl->image_width;
386 uint32_t image_height = rl->image_height;
387 float threshold = rl->threshold;
388 box_t *boxes = (box_t *)rl->boxes;
[453]389
[458]390 for (int i = 0; i < rl->boxes_number; ++i)
391 {
392 int class = max_index(rl->probs[i], rl->classes);
393 float prob = rl->probs[i][class];
[453]394
[458]395 if (prob > threshold)
396 {
397 box_t *b = boxes + i;
398 uint32_t x1 = b->x * image_width - (b->w * image_width / 2);
399 uint32_t y1 = b->y * image_height - (b->h * image_height / 2);
400 uint32_t x2 = b->x * image_width + (b->w * image_width / 2);
401 uint32_t y2 = b->y * image_height + (b->h * image_height / 2);
402 callback(x1, y1, x2, y2, class, prob);
403 }
404 }
[453]405}
Note: See TracBrowser for help on using the repository browser.