source: azure_iot_hub_riscv/trunk/app_iothub_client/kendryte/kpu.h@ 458

Last change on this file since 458 was 458, checked in by coas-nagasima, 4 years ago

SPIとSerial、KPUの動作を改善

  • Property svn:eol-style set to native
  • Property svn:mime-type set to text/x-chdr;charset=UTF-8
File size: 12.8 KB
Line 
1/* Copyright 2018 Canaan Inc.
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15#ifndef _KPU_H
16#define _KPU_H
17
18#include <kendryte-k210.h>
19#include <stdint.h>
20#include "device.h"
21
22#ifdef __cplusplus
23extern "C" {
24#endif
25
26#define IOMEM 0x40000000
27#define dmac_channel_number_t int
28
29typedef int (*plic_irq_callback_t)(void *ctx);
30
31typedef struct
32{
33 union
34 {
35 uint64_t reg;
36 struct
37 {
38 uint64_t int_en : 1;
39 uint64_t ram_flag : 1;
40 uint64_t full_add : 1;
41 uint64_t depth_wise_layer : 1;
42 uint64_t reserved : 60;
43 } data;
44 } interrupt_enabe;
45
46 union
47 {
48 uint64_t reg;
49 struct
50 {
51 uint64_t image_src_addr : 15;
52 uint64_t reserved0 : 17;
53 uint64_t image_dst_addr : 15;
54 uint64_t reserved1 : 17;
55 } data;
56 } image_addr;
57
58 union
59 {
60 uint64_t reg;
61 struct
62 {
63 uint64_t i_ch_num : 10;
64 uint64_t reserved0 : 22;
65 uint64_t o_ch_num : 10;
66 uint64_t reserved1 : 6;
67 uint64_t o_ch_num_coef : 10;
68 uint64_t reserved2 : 6;
69 } data;
70 } image_channel_num;
71
72 union
73 {
74 uint64_t reg;
75 struct
76 {
77 uint64_t i_row_wid : 10;
78 uint64_t i_col_high : 9;
79 uint64_t reserved0 : 13;
80 uint64_t o_row_wid : 10;
81 uint64_t o_col_high : 9;
82 uint64_t reserved1 : 13;
83 } data;
84 } image_size;
85
86 union
87 {
88 uint64_t reg;
89 struct
90 {
91 uint64_t kernel_type : 3;
92 uint64_t pad_type : 1;
93 uint64_t pool_type : 4;
94 uint64_t first_stride : 1;
95 uint64_t bypass_conv : 1;
96 uint64_t load_para : 1;
97 uint64_t reserved0 : 5;
98 uint64_t dma_burst_size : 8;
99 uint64_t pad_value : 8;
100 uint64_t bwsx_base_addr : 32;
101 } data;
102 } kernel_pool_type_cfg;
103
104 union
105 {
106 uint64_t reg;
107 struct
108 {
109 uint64_t load_coor : 1;
110 uint64_t load_time : 6;
111 uint64_t reserved0 : 8;
112 uint64_t para_size : 17;
113 uint64_t para_start_addr : 32;
114 } data;
115 } kernel_load_cfg;
116
117 union
118 {
119 uint64_t reg;
120 struct
121 {
122 uint64_t coef_column_offset : 4;
123 uint64_t coef_row_offset : 12;
124 uint64_t reserved0 : 48;
125 } data;
126 } kernel_offset;
127
128 union
129 {
130 uint64_t reg;
131 struct
132 {
133 uint64_t channel_switch_addr : 15;
134 uint64_t reserved : 1;
135 uint64_t row_switch_addr : 4;
136 uint64_t coef_size : 8;
137 uint64_t coef_group : 3;
138 uint64_t load_act : 1;
139 uint64_t active_addr : 32;
140 } data;
141 } kernel_calc_type_cfg;
142
143 union
144 {
145 uint64_t reg;
146 struct
147 {
148 uint64_t wb_channel_switch_addr : 15;
149 uint64_t reserved0 : 1;
150 uint64_t wb_row_switch_addr : 4;
151 uint64_t wb_group : 3;
152 uint64_t reserved1 : 41;
153 } data;
154 } write_back_cfg;
155
156 union
157 {
158 uint64_t reg;
159 struct
160 {
161 uint64_t shr_w : 4;
162 uint64_t shr_x : 4;
163 uint64_t arg_w : 24;
164 uint64_t arg_x : 24;
165 uint64_t reserved0 : 8;
166 } data;
167 } conv_value;
168
169 union
170 {
171 uint64_t reg;
172 struct
173 {
174 uint64_t arg_add : 40;
175 uint64_t reserved : 24;
176 } data;
177 } conv_value2;
178
179 union
180 {
181 uint64_t reg;
182 struct
183 {
184 uint64_t send_data_out : 1;
185 uint64_t reserved : 15;
186 uint64_t channel_byte_num : 16;
187 uint64_t dma_total_byte : 32;
188 } data;
189 } dma_parameter;
190} kpu_layer_argument_t;
191
192typedef struct
193{
194 union
195 {
196 uint64_t reg;
197 struct
198 {
199 uint64_t shift_number : 8;
200 uint64_t y_mul : 16;
201 uint64_t x_start : 36;
202 } data;
203 } activate_para[16];
204
205 union
206 {
207 uint64_t reg;
208 struct
209 {
210 uint8_t result_bias[8];
211 } data;
212 } activate_para_bias0;
213
214 union
215 {
216 uint64_t reg;
217 struct
218 {
219 uint8_t result_bias[8];
220 } data;
221 } activate_para_bias1;
222} kpu_activate_table_t;
223
224typedef struct
225{
226 union
227 {
228 uint64_t reg;
229 struct
230 {
231 uint64_t norm_mul : 24;
232 uint64_t norm_add : 32;
233 uint64_t norm_shift : 4;
234 } data;
235 } batchnorm;
236} kpu_batchnorm_argument_t;
237
238typedef struct
239{
240 union
241 {
242 uint64_t reg;
243 struct
244 {
245 uint16_t weight[9];
246 } data;
247 } weights;
248} kpu_weights_kernel_16_3x3_t;
249
250typedef struct
251{
252 uint64_t calc_done_int : 1;
253 uint64_t layer_cfg_almost_empty_int : 1;
254 uint64_t layer_cfg_almost_full_int : 1;
255 uint64_t reserved : 61;
256} kpu_config_interrupt_t;
257
258typedef struct
259{
260 uint64_t fifo_full_threshold : 4;
261 uint64_t fifo_empty_threshold : 4;
262 uint64_t reserved : 56;
263} kpu_config_fifo_threshold_t;
264
265typedef struct
266{
267 uint64_t dma_fifo_flush_n : 1;
268 uint64_t gs_fifo_flush_n : 1;
269 uint64_t cfg_fifo_flush_n : 1;
270 uint64_t cmd_fifo_flush_n : 1;
271 uint64_t resp_fifo_flush_n : 1;
272 uint64_t reserved : 59;
273} kpu_config_fifo_ctrl_t;
274
275typedef struct
276{
277 uint64_t eight_bit_mode : 1;
278 uint64_t reserved : 63;
279} kpu_config_eight_bit_mode_t;
280
281typedef struct
282{
283 volatile uint64_t layer_argument_fifo;
284
285 volatile union
286 {
287 uint64_t reg;
288 kpu_config_interrupt_t data;
289 } interrupt_status;
290
291 volatile union
292 {
293 uint64_t reg;
294 kpu_config_interrupt_t data;
295 } interrupt_raw;
296
297 volatile union
298 {
299 uint64_t reg;
300 kpu_config_interrupt_t data;
301 } interrupt_mask;
302
303 volatile union
304 {
305 uint64_t reg;
306 kpu_config_interrupt_t data;
307 } interrupt_clear;
308
309 volatile union
310 {
311 uint64_t reg;
312 kpu_config_fifo_threshold_t data;
313 } fifo_threshold;
314
315 volatile uint64_t fifo_data_out;
316
317 volatile union
318 {
319 uint64_t reg;
320 kpu_config_fifo_ctrl_t data;
321 } fifo_ctrl;
322
323 volatile union
324 {
325 uint64_t reg;
326 kpu_config_eight_bit_mode_t data;
327 } eight_bit_mode;
328} kpu_config_t;
329
330typedef struct
331{
332 uint32_t version;
333 uint32_t flags;
334 uint32_t arch;
335 uint32_t layers_length;
336 uint32_t max_start_address;
337 uint32_t main_mem_usage;
338 uint32_t output_count;
339} kpu_kmodel_header_t;
340
341typedef struct
342{
343 uint32_t version;
344 uint32_t flags;
345 uint32_t layers_length;
346 uint32_t max_start_address;
347 uint32_t layers_argument_start;
348} kpu_model_header_t;
349
350typedef struct
351{
352 uint32_t address;
353 uint32_t size;
354} kpu_model_output_t;
355
356typedef enum
357{
358 KL_INVALID = 0,
359 KL_ADD,
360 KL_QUANTIZED_ADD,
361 KL_GLOBAL_MAX_POOL2D,
362 KL_QUANTIZED_GLOBAL_MAX_POOL2D,
363 KL_GLOBAL_AVERAGE_POOL2D,
364 KL_QUANTIZED_GLOBAL_AVERAGE_POOL2D,
365 KL_MAX_POOL2D,
366 KL_QUANTIZED_MAX_POOL2D,
367 KL_AVERAGE_POOL2D,
368 KL_QUANTIZED_AVERAGE_POOL2D,
369 KL_QUANTIZE,
370 KL_DEQUANTIZE,
371 KL_REQUANTIZE,
372 KL_L2_NORMALIZATION,
373 KL_SOFTMAX,
374 KL_CONCAT,
375 KL_QUANTIZED_CONCAT,
376 KL_FULLY_CONNECTED,
377 KL_QUANTIZED_FULLY_CONNECTED,
378 KL_TENSORFLOW_FLATTEN,
379 KL_QUANTIZED_TENSORFLOW_FLATTEN,
380 KL_RESIZE_NEAREST_NEIGHBOR,
381 KL_QUANTIZED_RESIZE_NEAREST_NEIGHBOR,
382 KL_CHANNELWISE_DEQUANTIZE,
383 KL_LOGISTIC,
384 KL_K210_CONV = 10240,
385 KL_K210_ADD_PADDING,
386 KL_K210_REMOVE_PADDING,
387 KL_K210_UPLOAD
388} kpu_model_layer_type_t;
389
390typedef struct
391{
392 uint32_t type;
393 uint32_t body_size;
394} kpu_model_layer_header_t;
395
396typedef enum
397{
398 KLF_NONE = 0,
399 KLF_MAIN_MEM_OUT = 1
400} kpu_model_layer_flags_t;
401
402typedef enum
403{
404 KLP_SAME = 0,
405 KLP_VALID = 1
406} kpu_model_padding_t;
407
408typedef enum
409{
410 KLA_LINEAR = 0,
411 KLA_RELU = 1,
412 KLA_RELU6 = 2
413} kpu_model_activation_t;
414
415typedef struct
416{
417 float scale;
418 float bias;
419} kpu_model_quant_param_t;
420
421typedef struct
422{
423 uint32_t width;
424 uint32_t height;
425 uint32_t channels;
426} kpu_model_shape_t;
427
428typedef struct
429{
430 uint32_t start;
431 uint32_t size;
432} kpu_model_memory_range_t;
433
434typedef struct
435{
436 uint32_t flags;
437 uint32_t main_mem_out_address;
438 uint32_t layer_offset;
439 uint32_t weights_offset;
440 uint32_t bn_offset;
441 uint32_t act_offset;
442} kpu_model_conv_layer_argument_t;
443
444typedef struct
445{
446 uint32_t flags;
447 uint32_t main_mem_in_a_address;
448 uint32_t main_mem_in_b_address;
449 uint32_t main_mem_out_address;
450 uint32_t count;
451} kpu_model_add_layer_argument_t;
452
453typedef struct
454{
455 uint32_t flags;
456 uint32_t main_mem_in_a_address;
457 uint32_t main_mem_in_b_address;
458 uint32_t main_mem_out_address;
459 uint32_t count;
460 int32_t in_a_offset;
461 int32_t in_a_mul;
462 int32_t in_a_shift;
463 int32_t in_b_offset;
464 int32_t in_b_mul;
465 int32_t in_b_shift;
466 int32_t out_offset;
467 int32_t out_mul;
468 int32_t out_shift;
469} kpu_model_quant_add_layer_argument_t;
470
471typedef struct
472{
473 uint32_t flags;
474 uint32_t main_mem_in_address;
475 uint32_t main_mem_out_address;
476 uint32_t kernel_size;
477 uint32_t channels;
478} kpu_model_gap2d_layer_argument_t;
479
480typedef struct
481{
482 uint32_t flags;
483 uint32_t main_mem_in_address;
484 uint32_t main_mem_out_address;
485 kpu_model_shape_t in_shape;
486 kpu_model_shape_t out_shape;
487 uint32_t kernel_width;
488 uint32_t kernel_height;
489 uint32_t stride_width;
490 uint32_t stride_height;
491 uint32_t padding_width;
492 uint32_t padding_height;
493} kpu_model_quant_max_pool2d_layer_argument_t;
494
495typedef struct
496{
497 uint32_t flags;
498 uint32_t main_mem_in_address;
499 uint32_t main_mem_out_address;
500 kpu_model_shape_t in_shape;
501 kpu_model_shape_t out_shape;
502 uint32_t kernel_width;
503 uint32_t kernel_height;
504 uint32_t stride_width;
505 uint32_t stride_height;
506 uint32_t padding_width;
507 uint32_t padding_height;
508 kpu_model_activation_t act;
509} kpu_model_ave_pool2d_layer_argument_t;
510
511typedef struct
512{
513 uint32_t flags;
514 uint32_t main_mem_in_address;
515 uint32_t mem_out_address;
516 uint32_t count;
517 kpu_model_quant_param_t quant_param;
518} kpu_model_quantize_layer_argument_t;
519
520typedef struct
521{
522 uint32_t flags;
523 uint32_t main_mem_in_address;
524 uint32_t main_mem_out_address;
525 uint32_t count;
526 kpu_model_quant_param_t quant_param;
527} kpu_model_dequantize_layer_argument_t;
528
529typedef struct
530{
531 uint32_t flags;
532 uint32_t main_mem_in_address;
533 uint32_t main_mem_out_address;
534 uint32_t count;
535 uint8_t table[256];
536} kpu_model_requantize_layer_argument_t;
537
538typedef struct
539{
540 uint32_t flags;
541 uint32_t main_mem_in_address;
542 uint32_t kpu_mem_out_address;
543 uint32_t channels;
544} kpu_model_add_padding_layer_argument_t;
545
546typedef struct
547{
548 uint32_t flags;
549 uint32_t main_mem_in_address;
550 uint32_t main_mem_out_address;
551 uint32_t channels;
552} kpu_model_remove_padding_layer_argument_t;
553
554typedef struct
555{
556 uint32_t flags;
557 uint32_t main_mem_in_address;
558 uint32_t kpu_mem_out_address;
559 uint32_t width;
560 uint32_t height;
561 uint32_t channels;
562} kpu_model_upload_layer_argument_t;
563
564typedef struct
565{
566 uint32_t flags;
567 uint32_t main_mem_in_address;
568 uint32_t main_mem_out_address;
569 uint32_t channels;
570} kpu_model_l2_norm_layer_argument_t;
571
572typedef struct
573{
574 uint32_t flags;
575 uint32_t main_mem_in_address;
576 uint32_t main_mem_out_address;
577 uint32_t channels;
578} kpu_model_softmax_layer_argument_t;
579
580typedef struct
581{
582 uint32_t flags;
583 uint32_t main_mem_out_address;
584 uint32_t input_count;
585 kpu_model_memory_range_t inputs_mem[0];
586} kpu_model_concat_layer_argument_t;
587
588typedef struct
589{
590 uint32_t flags;
591 uint32_t main_mem_in_address;
592 uint32_t main_mem_out_address;
593 uint32_t in_channels;
594 uint32_t out_channels;
595 kpu_model_activation_t act;
596 float weights[0];
597} kpu_model_fully_connected_layer_argument_t;
598
599typedef struct
600{
601 uint32_t flags;
602 uint32_t main_mem_in_address;
603 uint32_t main_mem_out_address;
604 kpu_model_shape_t shape;
605} kpu_model_tf_flatten_layer_argument_t;
606
607typedef struct
608{
609 uint32_t flags;
610 uint32_t main_mem_in_address;
611 uint32_t main_mem_out_address;
612 kpu_model_shape_t in_shape;
613 uint32_t out_width;
614 uint32_t out_height;
615 uint32_t align_corners;
616} kpu_model_resize_nearest_neighbor_layer_argument_t;
617
618typedef struct
619{
620 uint32_t flags;
621 uint32_t main_mem_in_address;
622 uint32_t main_mem_out_address;
623 kpu_model_shape_t in_shape;
624 uint32_t out_width;
625 uint32_t out_height;
626 uint32_t align_corners;
627} kpu_model_quant_resize_nearest_neighbor_layer_argument_t;
628
629typedef struct
630{
631 uint32_t flags;
632 uint32_t main_mem_in_address;
633 uint32_t main_mem_out_address;
634 uint32_t channels;
635 uint32_t channel_size;
636 kpu_model_quant_param_t quant_params[0];
637} kpu_model_channelwise_dequant_argument_t;
638
639typedef struct
640{
641 uint32_t flags;
642 uint32_t main_mem_in_address;
643 uint32_t main_mem_out_address;
644 uint32_t channels;
645} kpu_model_logistic_layer_argument_t;
646
647typedef void (*kpu_done_callback_t)(void *userdata);
648
649typedef struct
650{
651 union
652 {
653 struct
654 {
655 const uint8_t *model_buffer;
656 uint8_t *main_buffer;
657 uint32_t output_count;
658 const kpu_model_output_t *outputs;
659 const kpu_model_layer_header_t *layer_headers;
660 const uint8_t *body_start;
661 uint32_t layers_length;
662 volatile uint32_t current_layer;
663 const uint8_t *volatile current_body;
664 dmac_channel_number_t dma_ch;
665 kpu_done_callback_t done_callback;
666 void *userdata;
667 };
668
669 struct
670 {
671 void* nncase_ctx;
672 };
673 };
674} kpu_model_context_t;
675
676typedef struct
677{
678 uint32_t weigths_offset;
679 uint32_t bn_offset;
680 uint32_t act_offset;
681 float input_scale;
682 float input_bias;
683 float output_scale;
684 float output_bias;
685} kpu_model_layer_metadata_t;
686
687typedef struct _quantize_param
688{
689 float scale;
690 float bias;
691} quantize_param_t;
692
693extern volatile kpu_config_t *const kpu;
694
695ER kpu_init(kpu_model_context_t *ctx);
696int kpu_load_kmodel(kpu_model_context_t *ctx, const uint8_t *buffer);
697int kpu_get_output(kpu_model_context_t *ctx, uint32_t index, uint8_t **data, size_t *size);
698int kpu_run_kmodel(kpu_model_context_t *ctx, const uint8_t *src, dmac_channel_number_t dma_ch, kpu_done_callback_t done_callback, void *userdata);
699ER kpu_wait_done(kpu_model_context_t *ctx, TMO tmout);
700
701#ifdef __cplusplus
702}
703#endif
704
705#endif
Note: See TracBrowser for help on using the repository browser.