============================= test session starts ==============================
platform linux -- Python 3.7.5, pytest-5.4.3, py-1.11.0, pluggy-0.13.1
rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/graph_kernel/symbolic_shape, inifile: /home/jenkins/sault/virtual_test/virtualenv_002/sault/config/pytest.ini
plugins: mock-3.11.1, forked-1.6.0, xdist-1.32.0
collected 1 item

test_kernelpacket.py [WARNING] ME(3808856:281473395218304,MainProcess):2025-03-05-18:46:42.306.913 [mindspore/context.py:1999] For 'context.set_context', 'enable_graph_kernel' parameter is deprecated, and will be removed in the next version. Please use jit_config={'jit_level': 'O1'} instead.

TotalTime = 0.319833, [21]
[bootstrap]: 0.00340873
[type_inference]: 0.285665
[auto_monad]: 0.0001866
[graph_reusing]: 2.82e-06
[inline]: 1.92999e-06
[parallel-infer-symbol]: 1.75e-06
[pre_auto_parallel]: 0.000372721
[insert-virtual-dataset]: 2.99e-06
[parallel-infer-symbol-second]: 6.40008e-07
[dataset_repeat_opt]: 1.35e-06
[pipeline_split]: 1.13e-06
[optimize]: 0.0158707, [52]
    [py_interpret_to_execute]: 2.743e-05
    [rewriter_before_opt_a]: 0.00011431
    [opt_a]: 0.0135603, [2]
        [Cycle 1]: 0.00417684, [43]
            [expand_dump_flag]: 5.61e-06
            [switch_simplify]: 0.0001861
            [loop_unroll]: 3.563e-05
            [a_1]: 0.000869652
            [recompute_prepare]: 1.018e-05
            [updatestate_depend_eliminate]: 1.068e-05
            [updatestate_assign_eliminate]: 5.00999e-06
            [updatestate_loads_eliminate]: 4.99e-06
            [parameter_eliminate]: 3.74e-06
            [a_2]: 0.00012322
            [accelerated_algorithm]: 2.342e-05
            [shard]: 1.60999e-06
            [meta_shard_fg_expand]: 4.2e-06
            [shard_inline]: 9.22001e-06
            [auto_parallel]: 1.296e-05
            [parallel]: 1.257e-05
            [flash_sp]: 1.304e-05
            [merge_comm]: 7.82999e-06
            [allreduce_fusion]: 5.63001e-06
            [matmul_add_comm_reduction]: 9.91e-06
            [allreduce_slice_to_reducescatter]: 4.50003e-07
            [virtual_shard_identity]: 1.058e-05
            [virtual_dataset]: 8.91001e-06
            [get_grad_eliminate_]: 8.54999e-06
            [virtual_output]: 8.67e-06
            [merge_forward]: 6.25e-06
            [cell_reuse_recompute_pass]: 1.47001e-06
            [cell_reuse_handle_not_recompute_node_pass]: 1.7491e-05
            [before_grad]: 1.63e-05
            [inplace_validation]: 5.24e-06
            [meta_fg_expand]: 6.43e-06
            [inplace_validation_after_expand]: 6.75001e-06
            [flash_sp_send_recv_attached]: 2.16e-06
            [receive_attached]: 3.98999e-06
            [after_resolve]: 1.335e-05
            [a_after_grad]: 1.379e-05
            [special_op_eliminate]: 9.14e-06
            [renormalize]: 0.00218372
            [add_forward_monad_depend]: 4.19001e-06
            [auto_monad_grad]: 2.05e-06
            [auto_monad_eliminator]: 1.555e-05
            [cse]: 0.000122391
            [a_3]: 6.368e-05
        [Cycle 2]: 0.000870991, [43]
            [expand_dump_flag]: 1.37e-06
            [switch_simplify]: 9.75e-06
            [loop_unroll]: 8.26e-06
            [a_1]: 0.00022412
            [recompute_prepare]: 8.41e-06
            [updatestate_depend_eliminate]: 6.14999e-06
            [updatestate_assign_eliminate]: 4.48e-06
            [updatestate_loads_eliminate]: 4.26e-06
            [parameter_eliminate]: 1.54e-06
            [a_2]: 0.00011295
            [accelerated_algorithm]: 1.153e-05
            [shard]: 1.09e-06
            [meta_shard_fg_expand]: 2.76e-06
            [shard_inline]: 8.38999e-06
            [auto_parallel]: 1.2191e-05
            [parallel]: 4.22999e-06
            [flash_sp]: 2.98e-06
            [merge_comm]: 7.07e-06
            [allreduce_fusion]: 5.98001e-06
            [matmul_add_comm_reduction]: 8.84001e-06
            [allreduce_slice_to_reducescatter]: 5.59987e-07
            [virtual_shard_identity]: 9.12001e-06
            [virtual_dataset]: 8.21e-06
            [get_grad_eliminate_]: 8.62e-06
            [virtual_output]: 7.7e-06
            [merge_forward]: 5.42001e-06
            [cell_reuse_recompute_pass]: 2.55e-06
            [cell_reuse_handle_not_recompute_node_pass]: 1.634e-05
            [before_grad]: 1.387e-05
            [inplace_validation]: 4.73e-06
            [meta_fg_expand]: 4.81e-06
            [inplace_validation_after_expand]: 5.81e-06
            [flash_sp_send_recv_attached]: 9.5999e-07
            [receive_attached]: 9.89996e-07
            [after_resolve]: 1.135e-05
            [a_after_grad]: 1.384e-05
            [special_op_eliminate]: 8.48e-06
            [renormalize]: 1.09998e-07
            [add_forward_monad_depend]: 1.23e-06
            [auto_monad_grad]: 1.04999e-06
            [auto_monad_eliminator]: 9.36001e-06
            [cse]: 2.525e-05
            [a_3]: 5.244e-05
    [py_interpret_to_execute_after_opt_a]: 1.329e-05
    [slice_cell_reuse_recomputed_activation]: 1.83999e-06
    [rewriter_after_opt_a]: 0.000250161
    [convert_after_rewriter]: 1.092e-05
    [order_py_execute_after_rewriter]: 7.03e-06
    [opt_b]: 0.00028041, [1]
        [Cycle 1]: 0.00027455, [7]
            [b_1]: 0.00018802
            [b_2]: 1.036e-05
            [updatestate_depend_eliminate]: 5.30001e-06
            [updatestate_assign_eliminate]: 4.53999e-06
            [updatestate_loads_eliminate]: 4.28999e-06
            [renormalize]: 3.09999e-07
            [cse]: 2.571e-05
    [optimize_parallel_all_gather_comm]: 8.17e-06
    [overlap_param_gather]: 2.505e-05
    [cconv]: 1.761e-05
    [loop_unroll]: 0.000568651
    [opt_after_cconv]: 0.00013106, [1]
        [Cycle 1]: 0.00012514, [7]
            [c_1]: 4.22e-05
            [parameter_eliminate]: 2.22e-06
            [updatestate_depend_eliminate]: 6.90999e-06
            [updatestate_assign_eliminate]: 4.34999e-06
            [updatestate_loads_eliminate]: 4.33e-06
            [cse]: 3.019e-05
            [renormalize]: 3.09999e-07
    [remove_dup_value]: 2.771e-05
    [tuple_transform]: 9.3411e-05, [1]
        [Cycle 1]: 8.8491e-05, [2]
            [d_1]: 7.6991e-05
            [renormalize]: 2.80008e-07
    [partial_unused_args_eliminate]: 1.98999e-06
    [add_cache_embedding]: 1.227e-05
    [add_recomputation]: 7.388e-05
    [cse_after_recomputation]: 3.329e-05, [1]
        [Cycle 1]: 2.805e-05, [1]
            [cse]: 2.193e-05
    [environ_conv]: 1.732e-05
    [swap_dp_allreduce_reducescatter]: 7.76e-06
    [bias_add_comm_swap]: 1.71001e-06
    [label_micro_interleaved_index]: 1.38e-06
    [label_fine_grained_interleaved_index]: 1.45e-06
    [merge_cast_opt]: 8.89995e-07
    [slice_recompute_activation]: 1.23e-06
    [micro_interleaved_order_control]: 1.30001e-06
    [assign_add_opt]: 1.098e-05
    [ForceFp32Comm]: 7.59988e-07
    [remove_cast_before_assign_add]: 6.90008e-07
    [full_micro_interleaved_order_control]: 1.47001e-06
    [reorder_send_recv_between_fp_bp]: 1.18e-06
    [comm_op_add_attrs]: 7.49991e-07
    [add_comm_op_reuse_tag]: 6.50005e-07
    [interleave_split_concat_branches]: 6.10002e-07
    [interleave_parallel_branches]: 5.19998e-07
    [overlap_opt_shard_in_pipeline]: 1.328e-05
    [overlap_opt_shard_grad_in_pipeline]: 1.21999e-06
    [control_data_broadcast_order]: 8.59989e-07
    [grouped_pairwise_exchange_alltoall]: 7.89994e-07
    [offloading_packed_experts]: 9.00007e-07
    [overlap_recompute_and_grad_model_parallel]: 1.31999e-06
    [overlap_grad_matmul_and_grad_allreduce]: 6.09987e-07
    [overlap_recompute_allgather_and_fa_grad]: 7.10002e-07
    [overlap_grad_ring_attention]: 1.18e-06
    [overlap_grad_flash_sp]: 2.326e-05
    [begin_end_overlap_inline]: 5.29995e-07
    [split_matmul_comm_elemetwise]: 1.98001e-06
    [split_layernorm_comm]: 1.09e-06
    [handle_group_info]: 6.4999e-07
    [symbol_engine_optimizer]: 0.000235181, [1]
        [Cycle 1]: 0.000229591, [6]
            [build]: 0.00010859
            [elim_shapecalc]: 2.199e-05
            [elim_not_effective]: 3.1041e-05
            [opt_reshape]: 1.075e-05
            [fold_const_symbol]: 2.163e-05
            [renormalize]: 4.19997e-07
[pipeline_parallel_scheduler]: 1.42e-06
[auto_monad_reorder]: 2.515e-05
[get_jit_bprop_graph]: 3.69997e-07
[rewriter_after_jit_bprop_graph]: 2.06e-06
[eliminate_special_op_node]: 0.000516881
[distribtued_split]: 1.18e-06
[validate]: 6.159e-05
[task_emit]: 0.0133862
[execute]: 6.74e-06
Sums
    bootstrap                            :     0.003409s :  1.10%
    type_inference                       :     0.285665s : 92.13%
    auto_monad                           :     0.000187s :  0.06%
    graph_reusing                        :     0.000003s :  0.00%
    inline                               :     0.000002s :  0.00%
    parallel-infer-symbol                :     0.000002s :  0.00%
    pre_auto_parallel                    :     0.000373s :  0.12%
    insert-virtual-dataset               :     0.000003s :  0.00%
    parallel-infer-symbol-second         :     0.000001s :  0.00%
    dataset_repeat_opt                   :     0.000001s :  0.00%
    pipeline_split                       :     0.000001s :  0.00%
    optimize.py_interpret_to_execute     :     0.000027s :  0.01%
    optimize.rewriter_before_opt_a       :     0.000114s :  0.04%
    optimize.opt_a.expand_dump_flag      :     0.000007s :  0.00%
    optimize.opt_a.switch_simplify       :     0.000196s :  0.06%
    optimize.opt_a.loop_unroll           :     0.000044s :  0.01%
    optimize.opt_a.a_1                   :     0.001094s :  0.35%
    optimize.opt_a.recompute_prepare     :     0.000019s :  0.01%
    optimize.opt_a.updatestate_depend_eliminate :     0.000017s :  0.01%
    optimize.opt_a.updatestate_assign_eliminate :     0.000009s :  0.00%
    optimize.opt_a.updatestate_loads_eliminate :     0.000009s :  0.00%
    optimize.opt_a.parameter_eliminate   :     0.000005s :  0.00%
    optimize.opt_a.a_2                   :     0.000236s :  0.08%
    optimize.opt_a.accelerated_algorithm :     0.000035s :  0.01%
    optimize.opt_a.shard                 :     0.000003s :  0.00%
    optimize.opt_a.meta_shard_fg_expand  :     0.000007s :  0.00%
    optimize.opt_a.shard_inline          :     0.000018s :  0.01%
    optimize.opt_a.auto_parallel         :     0.000025s :  0.01%
    optimize.opt_a.parallel              :     0.000017s :  0.01%
    optimize.opt_a.flash_sp              :     0.000016s :  0.01%
    optimize.opt_a.merge_comm            :     0.000015s :  0.00%
    optimize.opt_a.allreduce_fusion      :     0.000012s :  0.00%
    optimize.opt_a.matmul_add_comm_reduction :     0.000019s :  0.01%
    optimize.opt_a.allreduce_slice_to_reducescatter :     0.000001s :  0.00%
    optimize.opt_a.virtual_shard_identity :     0.000020s :  0.01%
    optimize.opt_a.virtual_dataset       :     0.000017s :  0.01%
    optimize.opt_a.get_grad_eliminate_   :     0.000017s :  0.01%
    optimize.opt_a.virtual_output        :     0.000016s :  0.01%
    optimize.opt_a.merge_forward         :     0.000012s :  0.00%
    optimize.opt_a.cell_reuse_recompute_pass :     0.000004s :  0.00%
    optimize.opt_a.cell_reuse_handle_not_recompute_node_pass :     0.000034s :  0.01%
    optimize.opt_a.before_grad           :     0.000030s :  0.01%
    optimize.opt_a.inplace_validation    :     0.000010s :  0.00%
    optimize.opt_a.meta_fg_expand        :     0.000011s :  0.00%
    optimize.opt_a.inplace_validation_after_expand :     0.000013s :  0.00%
    optimize.opt_a.flash_sp_send_recv_attached :     0.000003s :  0.00%
    optimize.opt_a.receive_attached      :     0.000005s :  0.00%
    optimize.opt_a.after_resolve         :     0.000025s :  0.01%
    optimize.opt_a.a_after_grad          :     0.000028s :  0.01%
    optimize.opt_a.special_op_eliminate  :     0.000018s :  0.01%
    optimize.opt_a.renormalize           :     0.002184s :  0.70%
    optimize.opt_a.add_forward_monad_depend :     0.000005s :  0.00%
    optimize.opt_a.auto_monad_grad       :     0.000003s :  0.00%
    optimize.opt_a.auto_monad_eliminator :     0.000025s :  0.01%
    optimize.opt_a.cse                   :     0.000148s :  0.05%
    optimize.opt_a.a_3                   :     0.000116s :  0.04%
    optimize.py_interpret_to_execute_after_opt_a :     0.000013s :  0.00%
    optimize.slice_cell_reuse_recomputed_activation :     0.000002s :  0.00%
    optimize.rewriter_after_opt_a        :     0.000250s :  0.08%
    optimize.convert_after_rewriter      :     0.000011s :  0.00%
    optimize.order_py_execute_after_rewriter :     0.000007s :  0.00%
    optimize.opt_b.b_1                   :     0.000188s :  0.06%
    optimize.opt_b.b_2                   :     0.000010s :  0.00%
    optimize.opt_b.updatestate_depend_eliminate :     0.000005s :  0.00%
    optimize.opt_b.updatestate_assign_eliminate :     0.000005s :  0.00%
    optimize.opt_b.updatestate_loads_eliminate :     0.000004s :  0.00%
    optimize.opt_b.renormalize           :     0.000000s :  0.00%
    optimize.opt_b.cse                   :     0.000026s :  0.01%
    optimize.optimize_parallel_all_gather_comm :     0.000008s :  0.00%
    optimize.overlap_param_gather        :     0.000025s :  0.01%
    optimize.cconv                       :     0.000018s :  0.01%
    optimize.loop_unroll                 :     0.000569s :  0.18%
    optimize.opt_after_cconv.c_1         :     0.000042s :  0.01%
    optimize.opt_after_cconv.parameter_eliminate :     0.000002s :  0.00%
    optimize.opt_after_cconv.updatestate_depend_eliminate :     0.000007s :  0.00%
    optimize.opt_after_cconv.updatestate_assign_eliminate :     0.000004s :  0.00%
    optimize.opt_after_cconv.updatestate_loads_eliminate :     0.000004s :  0.00%
    optimize.opt_after_cconv.cse         :     0.000030s :  0.01%
    optimize.opt_after_cconv.renormalize :     0.000000s :  0.00%
    optimize.remove_dup_value            :     0.000028s :  0.01%
    optimize.tuple_transform.d_1         :     0.000077s :  0.02%
    optimize.tuple_transform.renormalize :     0.000000s :  0.00%
    optimize.partial_unused_args_eliminate :     0.000002s :  0.00%
    optimize.add_cache_embedding         :     0.000012s :  0.00%
    optimize.add_recomputation           :     0.000074s :  0.02%
    optimize.cse_after_recomputation.cse :     0.000022s :  0.01%
    optimize.environ_conv                :     0.000017s :  0.01%
    optimize.swap_dp_allreduce_reducescatter :     0.000008s :  0.00%
    optimize.bias_add_comm_swap          :     0.000002s :  0.00%
    optimize.label_micro_interleaved_index :     0.000001s :  0.00%
    optimize.label_fine_grained_interleaved_index :     0.000001s :  0.00%
    optimize.merge_cast_opt              :     0.000001s :  0.00%
    optimize.slice_recompute_activation  :     0.000001s :  0.00%
    optimize.micro_interleaved_order_control :     0.000001s :  0.00%
    optimize.assign_add_opt              :     0.000011s :  0.00%
    optimize.ForceFp32Comm               :     0.000001s :  0.00%
    optimize.remove_cast_before_assign_add :     0.000001s :  0.00%
    optimize.full_micro_interleaved_order_control :     0.000001s :  0.00%
    optimize.reorder_send_recv_between_fp_bp :     0.000001s :  0.00%
    optimize.comm_op_add_attrs           :     0.000001s :  0.00%
    optimize.add_comm_op_reuse_tag       :     0.000001s :  0.00%
    optimize.interleave_split_concat_branches :     0.000001s :  0.00%
    optimize.interleave_parallel_branches :     0.000001s :  0.00%
    optimize.overlap_opt_shard_in_pipeline :     0.000013s :  0.00%
    optimize.overlap_opt_shard_grad_in_pipeline :     0.000001s :  0.00%
    optimize.control_data_broadcast_order :     0.000001s :  0.00%
    optimize.grouped_pairwise_exchange_alltoall :     0.000001s :  0.00%
    optimize.offloading_packed_experts   :     0.000001s :  0.00%
    optimize.overlap_recompute_and_grad_model_parallel :     0.000001s :  0.00%
    optimize.overlap_grad_matmul_and_grad_allreduce :     0.000001s :  0.00%
    optimize.overlap_recompute_allgather_and_fa_grad :     0.000001s :  0.00%
    optimize.overlap_grad_ring_attention :     0.000001s :  0.00%
    optimize.overlap_grad_flash_sp       :     0.000023s :  0.01%
    optimize.begin_end_overlap_inline    :     0.000001s :  0.00%
    optimize.split_matmul_comm_elemetwise :     0.000002s :  0.00%
    optimize.split_layernorm_comm        :     0.000001s :  0.00%
    optimize.handle_group_info           :     0.000001s :  0.00%
    optimize.symbol_engine_optimizer.build :     0.000109s :  0.04%
    optimize.symbol_engine_optimizer.elim_shapecalc :     0.000022s :  0.01%
    optimize.symbol_engine_optimizer.elim_not_effective :     0.000031s :  0.01%
    optimize.symbol_engine_optimizer.opt_reshape :     0.000011s :  0.00%
    optimize.symbol_engine_optimizer.fold_const_symbol :     0.000022s :  0.01%
    optimize.symbol_engine_optimizer.renormalize :     0.000000s :  0.00%
    pipeline_parallel_scheduler          :     0.000001s :  0.00%
    auto_monad_reorder                   :     0.000025s :  0.01%
    get_jit_bprop_graph                  :     0.000000s :  0.00%
    rewriter_after_jit_bprop_graph       :     0.000002s :  0.00%
    eliminate_special_op_node            :     0.000517s :  0.17%
    distribtued_split                    :     0.000001s :  0.00%
    validate                             :     0.000062s :  0.02%
    task_emit                            :     0.013386s :  4.32%
    execute                              :     0.000007s :  0.00%


Time group info:
------[substitution.]   0.000370    90
 4.40% :     0.000016s :      5: substitution.elim_not_effective
 1.06% :     0.000004s :      1: substitution.elim_shapecalc_of_broadcastargs
 1.36% :     0.000005s :      3: substitution.float_tuple_getitem_switch
 2.37% :     0.000009s :      5: substitution.fold_const_symbol
 1.86% :     0.000007s :      6: substitution.graph_param_transform
48.32% :     0.000179s :      9: substitution.inline
 1.69% :     0.000006s :     10: substitution.j_node_and_user_rematch
 3.26% :     0.000012s :      2: substitution.less_batch_normalization
 1.77% :     0.000007s :      2: substitution.minmaximum_grad
 4.77% :     0.000018s :      9: substitution.reduce_eliminate
 2.07% :     0.000008s :     10: substitution.remove_not_recompute_node
 0.82% :     0.000003s :      2: substitution.replace_old_param
 6.62% :     0.000024s :      4: substitution.switch_simplify
 3.83% :     0.000014s :      4: substitution.tuple_list_convert_item_index_to_positive
 4.46% :     0.000016s :      4: substitution.tuple_list_get_item_const_eliminator
 2.86% :     0.000011s :      4: substitution.tuple_list_get_item_depend_reorder
 5.76% :     0.000021s :      6: substitution.tuple_list_get_item_eliminator
 2.71% :     0.000010s :      4: substitution.tuple_list_get_set_item_eliminator
------[type_inference.]   0.285597     2
99.08% :     0.282981s :      1: type_inference.infer
 0.92% :     0.002616s :      1: type_inference.specialize
------[replace.]   0.000122    13
46.80% :     0.000057s :      9: replace.inline
53.20% :     0.000065s :      4: replace.switch_simplify
------[match.]   0.000195    13
88.85% :     0.000174s :      9: match.inline
11.15% :     0.000022s :      4: match.switch_simplify
------[predicate.]   0.000331  1913
 0.97% :     0.000003s :     21: predicate.accumulaten_eliminater
 0.80% :     0.000003s :      6: predicate.ad_related_special_op_eliminate
 0.52% :     0.000002s :     12: predicate.addn_check_dump
 1.05% :     0.000003s :     21: predicate.addn_zero_filter
 0.91% :     0.000003s :     21: predicate.adjust_all_reduce_mul_add
 2.01% :     0.000007s :     33: predicate.arithmetic_simplify
 0.96% :     0.000003s :     21: predicate.cast_eliminate
 0.60% :     0.000002s :     12: predicate.check_bprop_eliminate
 0.51% :     0.000002s :     12: predicate.compare_switch_simplify
 0.16% :     0.000001s :      6: predicate.const_output_eliminate
 0.35% :     0.000001s :      6: predicate.convert_tensor_all_eliminate
 1.48% :     0.000005s :     21: predicate.convert_tensor_eliminate
 0.54% :     0.000002s :     12: predicate.depend_value_elim
 1.00% :     0.000003s :     21: predicate.dict_get_item_const_eliminator
 1.14% :     0.000004s :     21: predicate.dict_get_item_eliminator
 0.98% :     0.000003s :     21: predicate.dict_set_item_eliminator
 0.23% :     0.000001s :      6: predicate.elim_not_effective
 0.52% :     0.000002s :      6: predicate.elim_shapecalc_of_broadcastargs
 1.18% :     0.000004s :     27: predicate.environ_add_const_eliminate
 1.15% :     0.000004s :     27: predicate.environ_get_add_eliminate
 1.14% :     0.000004s :     27: predicate.environ_get_depend_swap
 1.84% :     0.000006s :     39: predicate.environ_get_eliminate
 1.12% :     0.000004s :     27: predicate.environ_get_set_eliminate
 1.29% :     0.000004s :     30: predicate.exchange_switch_depend_value
 1.75% :     0.000006s :     30: predicate.float_depend_g_call
 0.55% :     0.000002s :     12: predicate.float_environ_get_switch
 0.88% :     0.000003s :     18: predicate.float_tuple_getitem_switch
 0.17% :     0.000001s :      6: predicate.fold_const_symbol
 0.67% :     0.000002s :     12: predicate.get_grad_eliminate
 0.22% :     0.000001s :      6: predicate.graph_param_transform
 0.55% :     0.000002s :     12: predicate.incorporate_call
 0.47% :     0.000002s :     12: predicate.incorporate_call_switch
 5.90% :     0.000020s :     87: predicate.inline
 0.81% :     0.000003s :     12: predicate.inline_without_move
 0.29% :     0.000001s :     12: predicate.j_node_and_user_rematch
 1.01% :     0.000003s :     13: predicate.less_batch_normalization
 1.63% :     0.000005s :     33: predicate.list_to_tuple_eliminator_
 2.45% :     0.000008s :     54: predicate.load_eliminater
 0.90% :     0.000003s :      6: predicate.loop_unroll_after_grad
 2.11% :     0.000007s :     43: predicate.loop_unroll_before_grad
 1.74% :     0.000006s :     33: predicate.make_slice_get_slice_eliminator
 0.61% :     0.000002s :     12: predicate.merge_addn
 0.62% :     0.000002s :     12: predicate.micro_step_allgather_replace
 0.51% :     0.000002s :     12: predicate.mini_step_allgather_replace
 0.95% :     0.000003s :     21: predicate.minmaximum_grad
 0.51% :     0.000002s :      6: predicate.mutable_eliminate
 0.37% :     0.000001s :      6: predicate.opt_reshape
 0.33% :     0.000001s :      6: predicate.parallel_virtual_node
 1.92% :     0.000006s :     30: predicate.partial_defer_inline
 1.29% :     0.000004s :     27: predicate.partial_eliminate
 0.98% :     0.000003s :     21: predicate.print_const_string_wrapper
 0.57% :     0.000002s :     12: predicate.reduce_all_const_elim
 1.98% :     0.000007s :     21: predicate.reduce_eliminate
 2.38% :     0.000008s :     54: predicate.redundant_stop_gradient_eliminater
 0.41% :     0.000001s :     12: predicate.remove_not_recompute_node
 1.06% :     0.000003s :     33: predicate.replace_applicator
 0.39% :     0.000001s :     12: predicate.replace_old_param
 0.17% :     0.000001s :      6: predicate.reset_defer_inline
 0.97% :     0.000003s :     21: predicate.reshape_eliminate
 0.67% :     0.000002s :     12: predicate.row_tensor_add_zeros_like
 0.41% :     0.000001s :      6: predicate.row_tensor_eliminate
 0.87% :     0.000003s :     12: predicate.same_eliminate
 0.36% :     0.000001s :     12: predicate.set_cell_output_no_recompute
 0.78% :     0.000003s :     12: predicate.shard_identity_eliminate
 1.12% :     0.000004s :     18: predicate.special_op_eliminate
 0.73% :     0.000002s :     12: predicate.specialize_transform
 0.78% :     0.000003s :     12: predicate.split_environ_get_set_with_tuple_value
 0.79% :     0.000003s :     12: predicate.stack_unstack_eliminate
 0.29% :     0.000001s :      6: predicate.switch_call_monad_eliminater
 1.41% :     0.000005s :     30: predicate.switch_defer_inline
 2.02% :     0.000007s :     42: predicate.switch_layer_defer_inline
 6.95% :     0.000023s :     93: predicate.switch_simplify
 1.04% :     0.000003s :     21: predicate.tile_eliminate
 1.19% :     0.000004s :     21: predicate.transpose_eliminate
 1.67% :     0.000006s :     33: predicate.tuple_list_convert_item_index_to_positive
 1.63% :     0.000005s :     33: predicate.tuple_list_get_item_const_eliminator
 1.43% :     0.000005s :     33: predicate.tuple_list_get_item_depend_reorder
 2.53% :     0.000008s :     45: predicate.tuple_list_get_item_eliminator
 1.52% :     0.000005s :     33: predicate.tuple_list_get_set_item_eliminator
 2.26% :     0.000007s :     45: predicate.tuple_list_set_item_eliminator
 1.54% :     0.000005s :     33: predicate.tuple_to_list_eliminator_
 2.37% :     0.000008s :     54: predicate.updatestate_pure_node_eliminater
 3.05% :     0.000010s :     66: predicate.updatestate_useless_node_eliminater
 0.38% :     0.000001s :      6: predicate.value_based_eliminate
 0.65% :     0.000002s :     12: predicate.virtual_dataset_eliminate
 0.62% :     0.000002s :     12: predicate.virtual_output_eliminate
 0.39% :     0.000001s :      6: predicate.zero_like_fill_zero
------[func_graph_cloner_run.]   0.001722    21
59.04% :     0.001017s :     10: func_graph_cloner_run.FuncGraphClonerGraph
40.96% :     0.000705s :     11: func_graph_cloner_run.FuncGraphSpecializer
------[meta_graph.]   0.000000     0
------[manager.]   0.000000     0
------[pynative]   0.000000     0
------[others.]   0.340067   192
 0.00% :     0.000004s :      1: ForceFp32Comm
 0.00% :     0.000016s :      1: add_cache_embedding
 0.00% :     0.000003s :      1: add_comm_op_reuse_tag
 0.02% :     0.000079s :      1: add_recomputation
 0.00% :     0.000014s :      1: assign_add_opt
 0.06% :     0.000200s :      1: auto_monad
 0.01% :     0.000032s :      1: auto_monad_reorder
 0.00% :     0.000004s :      1: begin_end_overlap_inline
 0.00% :     0.000005s :      1: bias_add_comm_swap
 1.02% :     0.003461s :      1: bootstrap
 0.01% :     0.000022s :      1: cconv
 0.00% :     0.000004s :      1: comm_op_add_attrs
 0.00% :     0.000004s :      1: control_data_broadcast_order
 0.00% :     0.000016s :      1: convert_after_rewriter
 0.01% :     0.000037s :      1: cse_after_recomputation
 0.00% :     0.000005s :      1: dataset_repeat_opt
 0.00% :     0.000007s :      1: distribtued_split
 0.16% :     0.000530s :      1: eliminate_special_op_node
 0.01% :     0.000022s :      1: environ_conv
 0.00% :     0.000014s :      1: execute
 0.00% :     0.000005s :      1: full_micro_interleaved_order_control
 0.00% :     0.000005s :      1: get_jit_bprop_graph
 0.00% :     0.000009s :      1: graph_reusing
 0.00% :     0.000004s :      1: grouped_pairwise_exchange_alltoall
 0.00% :     0.000004s :      1: handle_group_info
 0.00% :     0.000007s :      1: inline
 0.00% :     0.000009s :      1: insert-virtual-dataset
 0.00% :     0.000003s :      1: interleave_parallel_branches
 0.00% :     0.000004s :      1: interleave_split_concat_branches
 0.00% :     0.000005s :      1: label_fine_grained_interleaved_index
 0.00% :     0.000004s :      1: label_micro_interleaved_index
 0.17% :     0.000577s :      1: loop_unroll
 0.00% :     0.000004s :      1: merge_cast_opt
 0.00% :     0.000004s :      1: micro_interleaved_order_control
 0.00% :     0.000004s :      1: offloading_packed_experts
 0.00% :     0.000015s :      1: opt.transform.loop_unroll_optimizer
 0.55% :     0.001877s :     80: opt.transform.opt_a
 0.01% :     0.000041s :      1: opt.transform.opt_after_cconv
 0.05% :     0.000176s :     27: opt.transform.opt_b
 0.02% :     0.000075s :      1: opt.transform.opt_trans_graph
 0.01% :     0.000033s :      3: opt.transform.special_op_eliminate
 0.02% :     0.000081s :      4: opt.transform.symbol_engine_opt
 3.99% :     0.013565s :      1: opt_a
 0.04% :     0.000135s :      1: opt_after_cconv
 0.08% :     0.000284s :      1: opt_b
 4.67% :     0.015879s :      1: optimize
 0.00% :     0.000012s :      1: optimize_parallel_all_gather_comm
 0.00% :     0.000011s :      1: order_py_execute_after_rewriter
 0.01% :     0.000028s :      1: overlap_grad_flash_sp
 0.00% :     0.000004s :      1: overlap_grad_matmul_and_grad_allreduce
 0.00% :     0.000004s :      1: overlap_grad_ring_attention
 0.00% :     0.000004s :      1: overlap_opt_shard_grad_in_pipeline
 0.01% :     0.000017s :      1: overlap_opt_shard_in_pipeline
 0.01% :     0.000029s :      1: overlap_param_gather
 0.00% :     0.000004s :      1: overlap_recompute_allgather_and_fa_grad
 0.00% :     0.000004s :      1: overlap_recompute_and_grad_model_parallel
 0.00% :     0.000006s :      1: parallel-infer-symbol
 0.00% :     0.000005s :      1: parallel-infer-symbol-second
 0.00% :     0.000005s :      1: partial_unused_args_eliminate
 0.00% :     0.000006s :      1: pipeline_parallel_scheduler
 0.00% :     0.000005s :      1: pipeline_split
 0.11% :     0.000388s :      1: pre_auto_parallel
 0.01% :     0.000034s :      1: py_interpret_to_execute
 0.01% :     0.000018s :      1: py_interpret_to_execute_after_opt_a
 0.00% :     0.000004s :      1: remove_cast_before_assign_add
 0.01% :     0.000033s :      1: remove_dup_value
 0.41% :     0.001392s :      1: renormalize.infer
 0.23% :     0.000786s :      1: renormalize.specialize
 0.00% :     0.000004s :      1: reorder_send_recv_between_fp_bp
 0.00% :     0.000006s :      1: rewriter_after_jit_bprop_graph
 0.08% :     0.000257s :      1: rewriter_after_opt_a
 0.04% :     0.000120s :      1: rewriter_before_opt_a
 0.00% :     0.000005s :      1: slice_cell_reuse_recomputed_activation
 0.00% :     0.000004s :      1: slice_recompute_activation
 0.00% :     0.000004s :      1: split_layernorm_comm
 0.00% :     0.000005s :      1: split_matmul_comm_elemetwise
 0.00% :     0.000011s :      1: swap_dp_allreduce_reducescatter
 0.07% :     0.000239s :      1: symbol_engine_optimizer
 3.94% :     0.013405s :      1: task_emit
 0.03% :     0.000097s :      1: tuple_transform
84.01% :     0.285695s :      1: type_inference
 0.04% :     0.000129s :      1: validate

[ERROR] TBE Subprocess[task_distribute] raise error[], main process disappeared!
[ERROR] TBE Subprocess[task_distribute] raise error[], main process disappeared!
[ERROR] TBE Subprocess[task_distribute] raise error[], main process disappeared!
[ERROR] TBE Subprocess[task_distribute] raise error[], main process disappeared!
[ERROR] TBE Subprocess[task_distribute] raise error[], main process disappeared!
[ERROR] TBE Subprocess[task_distribute] raise error[], main process disappeared!
[ERROR] TBE Subprocess[task_distribute] raise error[], main process disappeared!
[ERROR] TBE Subprocess[task_distribute] raise error[], main process disappeared!
Process ForkServerProcess-10:
Traceback (most recent call last):
  File "/usr/local/python/python375/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/usr/local/python/python375/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/tbe/common/repository_manager/utils/common.py", line 110, in daemon_process
    os.kill(mgr_pid, signal.SIGKILL)
ProcessLookupError: [Errno 3] No such process
/usr/local/python/python375/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 30 leaked semaphores to clean up at shutdown
  len(cache))