- Timestamp:
- Sep 14, 2020, 6:36:03 PM (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
azure_iot_hub_riscv/trunk/app_iothub_client/kendryte/region_layer.c
r453 r458 6 6 typedef struct 7 7 { 8 9 10 11 8 float x; 9 float y; 10 float w; 11 float h; 12 12 } box_t; 13 13 14 14 typedef struct 15 15 { 16 17 18 16 int index; 17 int class; 18 float **probs; 19 19 } sortable_box_t; 20 20 … … 22 22 int region_layer_init(region_layer_t *rl, int width, int height, int channels, int origin_width, int origin_height) 23 23 { 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 24 int flag = 0; 25 26 rl->coords = 4; 27 rl->image_width = 320; 28 rl->image_height = 240; 29 30 rl->classes = channels / 5 - 5; 31 rl->net_width = origin_width; 32 rl->net_height = origin_height; 33 rl->layer_width = width; 34 rl->layer_height = height; 35 rl->boxes_number = (rl->layer_width * rl->layer_height * rl->anchor_number); 36 rl->output_number = (rl->boxes_number * (rl->classes + rl->coords + 1)); 37 38 rl->output = malloc(rl->output_number * sizeof(float)); 39 if (rl->output == NULL) 40 { 41 flag = -1; 42 goto malloc_error; 43 } 44 rl->boxes = malloc(rl->boxes_number * sizeof(box_t)); 45 if (rl->boxes == NULL) 46 { 47 flag = -2; 48 goto malloc_error; 49 } 50 rl->probs_buf = malloc(rl->boxes_number * (rl->classes + 1) * sizeof(float)); 51 if (rl->probs_buf == NULL) 52 { 53 flag = -3; 54 goto malloc_error; 55 } 56 rl->probs = malloc(rl->boxes_number * sizeof(float *)); 57 if (rl->probs == NULL) 58 { 59 flag = -4; 60 goto malloc_error; 61 } 62 for (uint32_t i = 0; i < rl->boxes_number; i++) 63 rl->probs[i] = &(rl->probs_buf[i * (rl->classes + 1)]); 64 return 0; 65 65 malloc_error: 66 67 68 69 70 66 free(rl->output); 67 free(rl->boxes); 68 free(rl->probs_buf); 69 free(rl->probs); 70 return flag; 71 71 } 72 72 73 73 void region_layer_deinit(region_layer_t *rl) 74 74 { 75 76 77 78 75 free(rl->output); 76 free(rl->boxes); 77 free(rl->probs_buf); 78 free(rl->probs); 79 79 } 80 80 81 81 static inline float sigmoid(float x) 82 82 { 83 83 return 1.f / (1.f + expf(-x)); 84 84 } 85 85 86 86 static void activate_array(region_layer_t *rl, int index, int n) 87 87 { 88 89 90 91 92 88 float *output = &rl->output[index]; 89 float *input = &rl->input[index]; 90 91 for (int i = 0; i < n; ++i) 92 output[i] = sigmoid(input[i]); 93 93 } 94 94 95 95 static int entry_index(region_layer_t *rl, int location, int entry) 96 96 { 97 98 99 100 101 97 int wh = rl->layer_width * rl->layer_height; 98 int n = location / wh; 99 int loc = location % wh; 100 101 return n * wh * (rl->coords + rl->classes + 1) + entry * wh + loc; 102 102 } 103 103 104 104 static void softmax(region_layer_t *rl, float *input, int n, int stride, float *output) 105 105 { 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 106 int i; 107 float diff; 108 float e; 109 float sum = 0; 110 float largest_i = input[0]; 111 112 for (i = 0; i < n; ++i) 113 { 114 if (input[i * stride] > largest_i) 115 largest_i = input[i * stride]; 116 } 117 118 for (i = 0; i < n; ++i) { 119 diff = input[i * stride] - largest_i; 120 e = expf(diff); 121 sum += e; 122 output[i * stride] = e; 123 } 124 for (i = 0; i < n; ++i) 125 output[i * stride] /= sum; 126 126 } 127 127 128 128 static void softmax_cpu(region_layer_t *rl, float *input, int n, int batch, int batch_offset, int groups, int stride, float *output) 129 129 { 130 131 132 133 134 135 130 int g, b; 131 132 for (b = 0; b < batch; ++b) { 133 for (g = 0; g < groups; ++g) 134 softmax(rl, input + b * batch_offset + g, n, stride, output + b * batch_offset + g); 135 } 136 136 } 137 137 138 138 static void forward_region_layer(region_layer_t *rl) 139 139 { 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 140 int index; 141 142 for (index = 0; index < rl->output_number; index++) 143 rl->output[index] = rl->input[index]; 144 145 for (int n = 0; n < rl->anchor_number; ++n) 146 { 147 index = entry_index(rl, n * rl->layer_width * rl->layer_height, 0); 148 activate_array(rl, index, 2 * rl->layer_width * rl->layer_height); 149 index = entry_index(rl, n * rl->layer_width * rl->layer_height, 4); 150 activate_array(rl, index, rl->layer_width * rl->layer_height); 151 } 152 153 index = entry_index(rl, 0, rl->coords + 1); 154 softmax_cpu(rl, rl->input + index, rl->classes, rl->anchor_number, 155 rl->output_number / rl->anchor_number, rl->layer_width * rl->layer_height, 156 rl->layer_width * rl->layer_height, rl->output + index); 157 157 } 158 158 159 159 static void correct_region_boxes(region_layer_t *rl, box_t *boxes) 160 160 { 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 161 uint32_t net_width = rl->net_width; 162 uint32_t net_height = rl->net_height; 163 uint32_t image_width = rl->image_width; 164 uint32_t image_height = rl->image_height; 165 uint32_t boxes_number = rl->boxes_number; 166 int new_w = 0; 167 int new_h = 0; 168 169 if (((float)net_width / image_width) < 170 ((float)net_height / image_height)) { 171 new_w = net_width; 172 new_h = (image_height * net_width) / image_width; 173 } else { 174 new_h = net_height; 175 new_w = (image_width * net_height) / image_height; 176 } 177 for (int i = 0; i < boxes_number; ++i) { 178 box_t b = boxes[i]; 179 180 b.x = (b.x - (net_width - new_w) / 2. / net_width) / 181 ((float)new_w / net_width); 182 b.y = (b.y - (net_height - new_h) / 2. / net_height) / 183 ((float)new_h / net_height); 184 b.w *= (float)net_width / new_w; 185 b.h *= (float)net_height / new_h; 186 boxes[i] = b; 187 } 188 188 } 189 189 190 190 static box_t get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h, int stride) 191 191 { 192 193 194 195 196 197 198 192 volatile box_t b; 193 194 b.x = (i + x[index + 0 * stride]) / w; 195 b.y = (j + x[index + 1 * stride]) / h; 196 b.w = expf(x[index + 2 * stride]) * biases[2 * n] / w; 197 b.h = expf(x[index + 3 * stride]) * biases[2 * n + 1] / h; 198 return b; 199 199 } 200 200 201 201 static void get_region_boxes(region_layer_t *rl, float *predictions, float **probs, box_t *boxes) 202 202 { 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 203 uint32_t layer_width = rl->layer_width; 204 uint32_t layer_height = rl->layer_height; 205 uint32_t anchor_number = rl->anchor_number; 206 uint32_t classes = rl->classes; 207 uint32_t coords = rl->coords; 208 float threshold = rl->threshold; 209 210 for (int i = 0; i < layer_width * layer_height; ++i) 211 { 212 int row = i / layer_width; 213 int col = i % layer_width; 214 215 for (int n = 0; n < anchor_number; ++n) 216 { 217 int index = n * layer_width * layer_height + i; 218 219 for (int j = 0; j < classes; ++j) 220 probs[index][j] = 0; 221 int obj_index = entry_index(rl, n * layer_width * layer_height + i, coords); 222 int box_index = entry_index(rl, n * layer_width * layer_height + i, 0); 223 float scale = predictions[obj_index]; 224 225 boxes[index] = get_region_box(predictions, rl->anchor, n, box_index, col, row, 226 layer_width, layer_height, layer_width * layer_height); 227 228 float max = 0; 229 230 for (int j = 0; j < classes; ++j) 231 { 232 int class_index = entry_index(rl, n * layer_width * layer_height + i, coords + 1 + j); 233 float prob = scale * predictions[class_index]; 234 235 probs[index][j] = (prob > threshold) ? prob : 0; 236 if (prob > max) 237 max = prob; 238 } 239 probs[index][classes] = max; 240 } 241 } 242 correct_region_boxes(rl, boxes); 243 243 } 244 244 245 245 static int nms_comparator(void *pa, void *pb) 246 246 { 247 248 249 250 251 252 253 254 255 247 sortable_box_t a = *(sortable_box_t *)pa; 248 sortable_box_t b = *(sortable_box_t *)pb; 249 float diff = a.probs[a.index][b.class] - b.probs[b.index][b.class]; 250 251 if (diff < 0) 252 return 1; 253 else if (diff > 0) 254 return -1; 255 return 0; 256 256 } 257 257 258 258 static float overlap(float x1, float w1, float x2, float w2) 259 259 { 260 261 262 263 264 265 266 267 260 float l1 = x1 - w1/2; 261 float l2 = x2 - w2/2; 262 float left = l1 > l2 ? l1 : l2; 263 float r1 = x1 + w1/2; 264 float r2 = x2 + w2/2; 265 float right = r1 < r2 ? r1 : r2; 266 267 return right - left; 268 268 } 269 269 270 270 static float box_intersection(box_t a, box_t b) 271 271 { 272 273 274 275 276 277 272 float w = overlap(a.x, a.w, b.x, b.w); 273 float h = overlap(a.y, a.h, b.y, b.h); 274 275 if (w < 0 || h < 0) 276 return 0; 277 return w * h; 278 278 } 279 279 280 280 static float box_union(box_t a, box_t b) 281 281 { 282 283 284 285 282 float i = box_intersection(a, b); 283 float u = a.w * a.h + b.w * b.h - i; 284 285 return u; 286 286 } 287 287 288 288 static float box_iou(box_t a, box_t b) 289 289 { 290 290 return box_intersection(a, b) / box_union(a, b); 291 291 } 292 292 293 293 static void do_nms_sort(region_layer_t *rl, box_t *boxes, float **probs) 294 294 { 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 295 uint32_t boxes_number = rl->boxes_number; 296 uint32_t classes = rl->classes; 297 float nms_value = rl->nms_value; 298 int i, j, k; 299 sortable_box_t s[boxes_number]; 300 301 for (i = 0; i < boxes_number; ++i) 302 { 303 s[i].index = i; 304 s[i].class = 0; 305 s[i].probs = probs; 306 } 307 308 for (k = 0; k < classes; ++k) 309 { 310 for (i = 0; i < boxes_number; ++i) 311 s[i].class = k; 312 qsort(s, boxes_number, sizeof(sortable_box_t), nms_comparator); 313 for (i = 0; i < boxes_number; ++i) 314 { 315 if (probs[s[i].index][k] == 0) 316 continue; 317 box_t a = boxes[s[i].index]; 318 319 for (j = i + 1; j < boxes_number; ++j) 320 { 321 box_t b = boxes[s[j].index]; 322 323 if (box_iou(a, b) > nms_value) 324 probs[s[j].index][k] = 0; 325 } 326 } 327 } 328 328 } 329 329 330 330 static int max_index(float *a, int n) 331 331 { 332 333 334 335 336 337 338 339 340 341 342 343 332 int i, max_i = 0; 333 float max = a[0]; 334 335 for (i = 1; i < n; ++i) 336 { 337 if (a[i] > max) 338 { 339 max = a[i]; 340 max_i = i; 341 } 342 } 343 return max_i; 344 344 } 345 345 346 346 static void region_layer_output(region_layer_t *rl, obj_info_t *obj_info) 347 347 { 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 348 uint32_t obj_number = 0; 349 uint32_t image_width = rl->image_width; 350 uint32_t image_height = rl->image_height; 351 uint32_t boxes_number = rl->boxes_number; 352 float threshold = rl->threshold; 353 box_t *boxes = (box_t *)rl->boxes; 354 355 for (int i = 0; i < rl->boxes_number; ++i) 356 { 357 int class = max_index(rl->probs[i], rl->classes); 358 float prob = rl->probs[i][class]; 359 360 if (prob > threshold) 361 { 362 box_t *b = boxes + i; 363 obj_info->obj[obj_number].x1 = b->x * image_width - (b->w * image_width / 2); 364 obj_info->obj[obj_number].y1 = b->y * image_height - (b->h * image_height / 2); 365 obj_info->obj[obj_number].x2 = b->x * image_width + (b->w * image_width / 2); 366 obj_info->obj[obj_number].y2 = b->y * image_height + (b->h * image_height / 2); 367 obj_info->obj[obj_number].class_id = class; 368 obj_info->obj[obj_number].prob = prob; 369 obj_number++; 370 } 371 } 372 obj_info->obj_number = obj_number; 373 373 } 374 374 375 375 void region_layer_run(region_layer_t *rl, obj_info_t *obj_info) 376 376 { 377 378 379 380 377 forward_region_layer(rl); 378 get_region_boxes(rl, rl->output, rl->probs, rl->boxes); 379 do_nms_sort(rl, rl->boxes, rl->probs); 380 // region_layer_output(rl, obj_info); 381 381 } 382 382 383 383 void region_layer_draw_boxes(region_layer_t *rl, callback_draw_box callback) 384 384 { 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 } 385 uint32_t image_width = rl->image_width; 386 uint32_t image_height = rl->image_height; 387 float threshold = rl->threshold; 388 box_t *boxes = (box_t *)rl->boxes; 389 390 for (int i = 0; i < rl->boxes_number; ++i) 391 { 392 int class = max_index(rl->probs[i], rl->classes); 393 float prob = rl->probs[i][class]; 394 395 if (prob > threshold) 396 { 397 box_t *b = boxes + i; 398 uint32_t x1 = b->x * image_width - (b->w * image_width / 2); 399 uint32_t y1 = b->y * image_height - (b->h * image_height / 2); 400 uint32_t x2 = b->x * image_width + (b->w * image_width / 2); 401 uint32_t y2 = b->y * image_height + (b->h * image_height / 2); 402 callback(x1, y1, x2, y2, class, prob); 403 } 404 } 405 }
Note:
See TracChangeset
for help on using the changeset viewer.