## Text File ## FILE: davinci_mini.config ## DESC: Hardware specification for DaVinci-Mini ## CREATED: 2018-07-26 00:19:48 ## MODIFIED: 2018-07-26 00:19:48 ## -------------------------------------------------------------- @include fileToDir( configFile( ) ) + "/common.spec"; davinci { chip_version = "mini"; platform_type = "DVC_PLATFORM_LINUX"; platform_smmu_on = "false"; l2_size = "16 MB"; system_cache_size = "0 MB"; isa_tag = "7.2"; # ------------------------------------------------------------------- # BIU Specifications # ------------------------------------------------------------------- biu { port_num = "3"; icache_port = "0"; lsu_port = "2"; dcache_port = "1"; read_queue_size = "128"; write_queue_size = "64"; total_queue_size = "192"; high_priority_read_port = icache_port; high_priority_write_port = dcache_port; enable_memmap = "true"; ipu_port_width = "128 B"; # 1024/8 mte_data_size = "512 B"; mte_port_width = "128 B"; # 512/8 mte_read_port_num = "2"; mte_write_port_num = "1"; bus_port_width = "128 B"; # 512/8 read_bus_port_width = "256 B"; write_bus_port_width = "128 B"; bus_read_port_num = "1"; bus_write_port_num = "1"; write_buffer_depth = "4"; read_buffer_depth = "4"; ifu_read_queue_size = "8"; mem_entry_num = "64"; physical_mem_entry_num = "256"; dcache_port_width = "128"; dcache_read_queue_size = "4"; dcache_write_queue_size = "4"; addr_interleave_unit = "512"; # `mem_entry_size' will be calculated as l2_size/mem_entry_num is_chi_bus_inf = "0"; biu_esl_req_switch = "0"; }; # ------------------------------------------------------------------- # buffer Specifications # ------------------------------------------------------------------- buffer { aic_buffer_ini_value = "0x5a"; ##xiaoziming: set initial value for L0A/B/C/1/UB }; # ------------------------------------------------------------------- # DDR Specifications # ------------------------------------------------------------------- ddr { min_read_latency = "226"; read_latency_diver = "180"; min_write_latency = "120"; write_latency_diver = "180"; bandwidth_limit = "17"; max_credit_num = "4096"; }; # ------------------------------------------------------------------- # L2 Specifications # ------------------------------------------------------------------- l2 { bandwidth_limit = "114"; # read + write bandwidth, 96 B/ns read_bandwidth_limit = "114"; write_bandwidth_limit = "114"; max_credit_num = "4096"; min_read_latency = "38"; # 50ns: This number from Xu Weijia, according to Wu Xiaowen this delay is 23 tick read_latency_diver = "4"; min_write_latency = "135"; # 180ns: This number from Xu Weijia, according to Wu Xiaowen this delay is 26 tick write_latency_diver = "4"; }; #-------------------------------------------------------------------- # System cache Specifications # ------------------------------------------------------------------- sys { base_addr = "16 MB"; bandwidth_limit = "128"; read_bandwidth_limit = "32"; write_bandwidth_limit = "32"; max_credit_num = "4096"; min_read_latency = "100"; read_latency_diver = "20"; min_write_latency = "100"; write_latency_diver = "20"; }; # ------------------------------------------------------------------- # PSQ Sepcifications # ------------------------------------------------------------------- psq { buffer_size = "32"; # instr's fetch_size = "16 B"; # 4 * IAS_MAX_BYTES dispatch_size = "2"; # instr's }; # ------------------------------------------------------------------- # Issue Queue Specifications # ------------------------------------------------------------------- issue_queue { size = "32"; ostd_num = "32"; event_queue_size = "32"; event_queue_num = "6"; cube_issue_queue_size = "32"; cube_issue_queue_ostd_num = "15"; vec_issue_queue_num = "2"; vec_issue_ib_enable = "1"; # instruction buffer vec_issue_ib_size = "4"; vec_issue_queue_size = "128"; vec_issue_queue_ostd_num = "31"; vec_issue_queue_conflict = "0"; mte1_issue_queue_size = "32"; mte1_issue_queue_ostd_num = "31"; mte2_issue_queue_size = "32"; mte2_issue_queue_ostd_num = "31"; mte3_issue_queue_size = "32"; mte3_issue_queue_ostd_num = "31"; mte4_issue_queue_size = "4"; mte4_issue_queue_ostd_num = "31"; mte5_issue_queue_size = "4"; mte5_issue_queue_ostd_num = "31"; scalar_issue_queue_size = "1"; }; # ------------------------------------------------------------------- # Vector # ------------------------------------------------------------------- vector { scatter_ports = "8"; # new version function switch,lite_es:0,lite_cs:1,Ascend610:1 dma_version = "1"; # vec double:lite es:0,lite_cs:0,Ascend610:1 vec double double_ctrl = "1"; #UB RD WR ctrl: 0:rd and wr merge; 1: rd and wr separate ub_rd_wr_mode = "0"; #Ascend610 instr fusion:lite es:0,lite_cs:0,Ascend610:1 instr_fusion = "1"; # for lite cs and Ascend610 new change:lite es:0,lite_cs:1,Ascend610:1 new_version = "1"; # for VCMAX,VCMIN MAX_MIN_CNT,0-Ascend610 proc as RTL; 1-baltimore cs proc as ISA max_min_cnt_ctrl = "0"; #vcmpv_ctrl=0:model mode;vcmpv_ctrl=1:RTL mode vcmpv_ctrl = "0"; #fp16_inf_proc_:for fp16_t inf proc,open when baltimore cs proc fp16_inf_proc = "1"; } # ------------------------------------------------------------------- # Unified Buffer Specifications # ------------------------------------------------------------------- ub { addr_shift = "256 KB"; buffer_size = "256 KB"; buffer_line_size = "32 B"; # 256/8 B buffer_bank_count = "64"; # buffer_bank_size can be calcuated as buffer_size/buffer_bank_count vector_access_width = davinci.ub.buffer_line_size; # vector access width is always equals to L0_UB_BUFFER_LINE_SIZE scalar_access_width = davinci.ub.buffer_line_size; # scalar access width is always equals to L0_UB_BUFFER_LINE_SIZE lsu_write_port_num = "2"; # l0_ub_lsu_wr_port_width always euqals to 2*L0_UB_BUFFER_LINE_SIZE scalar_rd_ub_latency = "7"; }; # ------------------------------------------------------------------- l1 { buffer_size = "1 MB"; buffer_bg_num = "16"; buffer_line_size = "32 B"; # 128/8 buffer_bank_count = "16"; # buffer_bank_size can be calculated from buffer_line_size/buffer_bank_count; core_access_width = "512"; # 4096/8 round_robbin_switch = "0"; bank_cnt_per_bg = "2"; l1_ecc_en = "1"; }; # ------------------------------------------------------------------- # L0 Specifications # ------------------------------------------------------------------- l0 { ab_buffer_size = "64 KB"; a_buffer_size = "64 KB"; b_buffer_size = "64 KB"; ab_buffer_line_size = "512 B"; # 4096/8, no bank conflict in L0AB; don't care too much a_buffer_line_size = "512 B"; # 4096/8, no bank conflict in L0AB; don't care too much b_buffer_line_size = "128 B"; # 2048/8, no bank conflict in L0AB; don't care too much ab_buffer_bank_count = "32"; # ab_buffer_bank_size = l0_ab_buffer_size/l0_ab_buffer_bank_count ab_cube_access_width = "512 B"; # 4096/8, no bank conflict in L0AB; don't care too much ab_lsu_access_width = "512 B"; # 4096/8, no bank conflict in L0AB; don't care too much a_buffer_auto_pingpong_en = "0"; #enabel l0a automatically pingpong b_buffer_auto_pingpong_en = "0"; #enabel l0b automatically pingpong # l0-c specifications c_buffer_size = "256 KB"; c_buffer_line_size = "32 B"; # 256/8 c_buffer_bank_count = "32"; c_buffer_auto_pingpong_en = "0"; #enable l0c automatically pingpong # c bank_size = l0_c_buffer_size/l0_c_buffer_count, which can be calculated on demand. c_cube_access_width = "1024 B"; c_vector_write_access_width = "256 B"; c_vector_read_access_width = "128 B"; c_buffer_line_size_ca = "32 B"; # 256/8 c_buffer_bank_count_ca = "16"; }; # ------------------------------------------------------------------- # Scalar Buffer Specifications # ------------------------------------------------------------------- scalar_buffer { base_address = "0x40000"; # 256K size = "16 KB"; ld_instr_ostd = "0x6"; st_instr_ostd = "0x6"; ld_st_separate_req_port = "true"; ld_st_hazard_check_unit = "32"; scalar_buffer_ecc_en = "1"; }; # ------------------------------------------------------------------- # CA MTE Specifications # ------------------------------------------------------------------- mte { l1_read_bus_width = "512 B"; #4096/8 l12l0c_max_bus_width = "128 B"; biu_write_bus_width = "64 B"; #512/8 unibuf_write_bus_width = "128 B"; #2*512/8 unibuf_read_bus_width = "256 B"; unibuf_trans_size = "32 B"; unzip_engine_num = "4"; l0_write_delay = "1"; l1_read_delay = "8"; l1_write_delay = "6"; unibuf_write_delay = "1"; #winograd winograd_l0a_bus_width = "256 B"; winograd_l0b_bus_width = "128 B"; winograd_c0_size = "8"; #depthwise read_colum_num = "16"; depthwise_c0_size = "16 B"; data_fifo_num = "4 B"; max_crdt_fifo_depth = "1"; # biu biu_mte_rob_slot_num = "128"; biu_mte_max_trans_size = "512"; #biu_mte_rob_size = biu_mte_rob_slot_num * biu_mte_max_trans_size, can always be calculated on demand biu_mte_read_tags = "128"; biu_mte_write_tags = "64"; biu_mte_read_ostd = "128"; biu_mte_write_ostd = "64"; default_repeat_time = "1"; max_biu_if_trans_size = biu_mte_max_trans_size; max_rd_tagfifo_size = "200"; lsu_biu_align_size = biu_write_bus_width; biu_cmdgen_delay = "1"; reoder_buffer_switch = "1"; ##0 for round robbin ;1 for front find ;2 for find tagid whitch back from ddr # comand scheduler max_crdt_ue_3d = "32"; max_crdt_l1_ue_2d = "32"; max_crdt_biu_ue_2d = "32"; max_crdt_l1_ue_dma = "32"; max_crdt_biu_ue_dma = "32"; max_crdt_ub_ue_dma = "32"; max_crdt_ue_unzip = "32"; max_crdt_ue_fmc = "32"; max_crdt_ue_fmd = "32"; max_crdt_ue_aipp = "32"; max_crdt_ue_ksparse = "32"; max_crdt_ue_winograd = "32"; ##FIXME: liujianan. max_crdt_ue_depthwise = "32"; ##FIXME: liujianan. cmd_iq_arb_delay_time = "1"; cmd_arb_dispatch_delay_time = "3"; # rob read & write latency l1_read_rob_delay = "6"; ub_read_rob_delay = "6"; l0_read_rob_delay = "6"; uzp_read_rob_delay = "6"; fmd_read_rob_delay = "6"; write_rob_delay = "0"; ktable_read_rob_delay = "6"; # mte fmcd fmc_pad_value = "0x0"; fmcd_engine_num = "1"; fmcd_head_size = "32 B"; fmcd_mask_size = "16 B"; fmcd_ubread_width = "128 B"; #128B fmc_wdata_size = "512 B"; #fmc_seg_size (FMCD_UBREAD_WIDTH/FMCD_ENGINE_NUM) //32B fmcd_seg_size = "128 B"; cube_size = "16 B"; #fm_size = cube_size_*cube_size_ * 2; fmc_write_bus_width = "64 B"; max_fmc_uop_crdt = "3"; max_fmc_wheader_num = "4"; max_fetch_head_num = "64"; fmd_mte_wdata_delay = "3"; fmcd_header_len_unit = "64"; fmcd_interleave_size = "512 B"; fmc_rdata_fifo_depth = "8"; # aipp min_h_res = "8"; max_h_res = "4096"; byte_per_pixel_in_l1 = "32"; y_dat_buf_size = "64"; uv_dat_buf_size = "64"; rgb_dat_buf_size = "128"; uv_upsample_buf_size = "4096"; # 512bx64 sync_buf_size = "96"; csc_buf_size = "24"; dtc_buf_size = "48"; cpadding_buf_size = "256"; pixels_per_trans = "8"; img_dat_channels = "3"; aipp_dat_buf_bubble = "3"; aipp_max_dtc_lat = "5"; aipp_dtc_u8_fp16_lat = "5"; aipp_dtc_u8_s8_lat = "1"; pixels_per_batch = "8"; dma_uc_rd_size = "512"; dma_dc_rd_size = "512"; dma_buf_size_dc = "512"; dma_buf_size_uc = "512"; dma_pl0_ping_buf_addr_uc = "0x0"; dma_pl0_pong_buf_addr_uc = "0x200"; dma_pl1_ping_buf_addr_dc = "0x0"; dma_pl1_pong_buf_addr_dc = "0x200"; dma_pl0_ping_buf_addr_dc = "0x400"; dma_pl0_pong_buf_addr_dc = "0x800"; aipp_input_buf_bandwidth = "128 B"; # img2col img2col_c0_size = "16"; # l1 interface max_l1_uop_crdt = "5"; l1_to_l0_delay = "3"; l1_to_ub_delay = "4"; l1_to_ub_delay_2 = "2"; # ub interface max_ub_uop_crdt = "3"; ub_to_l1_req_delay = "2"; rtl_delay = "1"; # unzip uop unzip_fm_size = "512"; unzip_pkt_size = "8 B"; unzip_head_size = "8 B"; unzip_dict_size = "34 B"; unzip_low_sparse_dict_size = "36 B"; unzip_str_dict_size = "1 B"; unzip_out_len = "64 B"; unzip_seg_size = "32 B"; unzip_buffer_size = "32768"; unzip_entry_size = "2"; max_fetch_idx_num = "64"; max_uzp_uop_crdt = "3"; unzip_delay_time = "4"; unzip_bypass_delay_time = "2"; unzip_buffer_depth = "2"; unzip_write_band_width = "256 B"; # uop ue_2d_delay = "1"; ue_3d_delay = "11"; ue_dma_delay = "1"; ue_smask_delay = "1"; # biu to buffer bus width biu_to_l1_bus_width = "128 B"; #1024/8 biu_to_l0a_bus_width = "256 B"; #1024/8 biu_to_l0b_bus_width = "128 B"; #1024/8 biu_to_ub_bus_width = "128 B"; #1024/8 biu_to_fmd_bus_width = "128 B"; #ub to buffer bus width ub_to_biu_bus_width = "128 B"; ub_to_l1_bus_width = "256 B"; ub_to_smask_bus_width = "128 B"; #l1 to buffer bus width l1_to_l0a_bus_width = "512 B"; l1_to_l0b_bus_width = "128 B"; l1_to_l0c_bus_width = "128 B"; l1_to_brc_bus_width = "1024 B"; l1_to_ub_bus_width = "128 B"; depthwise_bus_width = "864 B"; #fmd fmd_to_l1_bus_width = "128 B"; fmd_to_ub_bus_width = "128 B"; # new version function switch dma_scatter_mode = "1"; winograd_new_version = "1"; depthwise_early_start = "1"; l1_3d_size_round = "1"; load_3d_v2_new_version = "1"; aipp_bandwidth_limit = "0"; dma_batch_mode = "0"; # set2d set2d_l1_bandwidth = "256 B"; set2d_l0a_bandwidth = "512 B"; set2d_l0b_bandwidth = "128 B"; layout_mode = "0"; }; # ------------------------------------------------------------------- # CA Cube Specifications # ------------------------------------------------------------------- cube { cube_dummy_cycle_number = "0"; cube_spec_npe = "256"; cube_spec_cube_size = "16"; global_sync_pulse_phase_type = "1"; vdrop_tick = "48"; ## from Guo Zhenyi m_frac_size = "16"; n_frac_size = "16"; support_small_channel_ = "0"; early_trigger_set_flag = "0"; wino_cs_en = "1"; dp_conv_cs_en = "1"; mmad_offset_cs_en = "1"; inf_nan_tr_version_en = "1"; }; # ------------------------------------------------------------------- # dump # ------------------------------------------------------------------- dump { file_print_level = "2"; #print on dump log file level trace = 0, debug = 1, info = 2, warn = 3, error = 4, critical = 5, off = 6 screen_print_level = "3"; #printf on screen level trace = 0, debug = 1, info = 2, warn = 3, error = 4, critical = 5, off = 6 flush_level = "3"; #file flush level trace = 0, debug = 1, info = 2, warn = 3, error = 4, critical = 5, off = 6 dump_scalar_switch = "1"; #scalar instruction dump switch 0: off 1: on dump_switch = "1"; #all dump switch 0: off 1: on dump_instr_mask = "1"; #instr_log dump switch 0: off 1: on dump_cube_status = "1"; #cube_log dump switch 0: off 1: on dump_vector_status = "1"; #vector_log dump switch 0: off 1: on dump_scalar_status = "1"; #scalar_log dump switch 0: off 1: on dump_ccu_status = "1"; #ccu_log dump switch 0: off 1: on dump_dmac_status = "1"; #lsu_log dump switch 0: off 1: on dump_mte_rob_status = "1"; #rob_log dump switch 0: off 1: on dump_mte_biu_stall = "1"; #biu_stall_log dump switch 0: off 1: on dump_mte_biu_req = "1"; #mte_biu_req_log dump switch 0: off 1: on dump_mte_biu_resp = "1"; #mte_biu_resp_log dump switch 0: off 1: on dump_mte_rob_usage = "1"; #mte_biu_rob_usage_log dump switch 0: off 1: on dump_unzip_status = "1"; #unzip_log dump switch 0: off 1: on dump_fmd_status = "1"; #fmd_log dump switch 0: off 1: on dump_fmc_status = "1"; #fmc_log dump switch 0: off 1: on dump_mte_status = "1"; #mte_status_log dump switch 0: off 1: on dump_buffer_wr_status = "1";#buffer_log dump switch 0: off 1: on dump_buffer_rd_status = "1";#rd_buffer_log dump switch 0: off 1: on dump_l0a_rd_status = "1"; #l0a_rd_log dump switch 0: off 1: on dump_l0a_wr_status = "1"; #l0a_wr_log dump switch 0: off 1: on dump_l0b_rd_status = "1"; #l0b_rd_log dump switch 0: off 1: on dump_l0b_wr_status = "1"; #l0b_wr_log dump switch 0: off 1: on dump_l0c_rd_status = "1"; #l0c_rd_log dump switch 0: off 1: on dump_l0c_wr_status = "1"; #l0c_wr_log dump switch 0: off 1: on dump_ub_rd_status = "1"; #ub_rd_log dump switch 0: off 1: on dump_ub_wr_status = "1"; #ub_wr_log dump switch 0: off 1: on dump_l1_rd_status = "1"; #l1_rd_log dump switch 0: off 1: on dump_l1_wr_status = "1"; #l1_wr_log dump switch 0: off 1: on dump_biu_status = "1"; #biu_log dump switch 0: off 1: on dump_biu_result = "1"; #biu_result_log dump switch 0: off 1: on dump_biu_in = "1"; #biu_in_log dump switch 0: off 1: on dump_popped_instr_mask = "1"; #instr_popped_log dump switch 0: off 1: on dump_ub_status = "1"; #ub_log dump switch 0: off 1: on dump_issque_status = "1"; #issque_log dump switch 0: off 1: on dump_icache_status = "1"; #icache_log dump switch 0: off 1: on dump_buffer_op_time = "1"; #buffer_op_log dump switch 0: off 1: on dump_dcache_status = "1"; #dcache_log dump switch 0: off 1: on dump_buffer_pingpong_status = "1"; #buffer_pingpong_conflict_log dump switch 0: off 1: on dump_reg_status = "1"; #reg_log dump switch 0: off 1: on }; # ------------------------------------------------------------------- # CA Icache Specifications # ------------------------------------------------------------------- icache { ic_addr_width = "48"; ic_asso_num = "4"; ic_size = "32 KB"; #16*1024 ic_line_size = "128"; #1024/8 ic_entry_num = "64"; #(ic_size/ic_line_size)/ic_asso_num ic_line_num = "256"; #(ic_size/ic_line_size) ic_prefetch_num = "3"; ic_max_otsd_num = "8"; ic_max_preload_num = "7"; invalid_cache_line_num = "0xffffffff"; ic_idx_addr_lsb = "7"; ic_idx_addr_mask = "0x3f"; ic_tag_addr_lsb = "13"; ic_tag_addr_mask = "0x7ffffffff"; }; # ------------------------------------------------------------------- # CA Dcache Specifications # ------------------------------------------------------------------- dcache { dc_cross_line_num = "2"; dc_set_size = "32"; dc_way_size = "2"; dc_line_size = "64"; dc_line_num = "64"; #dc_line_num = dc_set_size * dc_way_size dc_size = "4 KB"; #dc_size = dc_line_size * dc_way_size * dc_set_size; dc_max_read_otsd_num = "4"; dc_max_write_otsd_num = "4"; dc_idx_addr_lsb = "6"; dc_idx_addr_mask = "0x1f"; dc_tag_addr_lsb = "11"; dc_tag_addr_mask = "0x1fffffffff"; dc_wreq_wb_ctrl = "1"; # Ascend610 dcache-biu wreq port }; };