==================================================Ascend ============================= test session starts ============================== platform linux -- Python 3.9.19, pytest-6.2.5, py-1.11.0, pluggy-1.5.0 rootdir: /home/jenkins/mindspore/testcases/testcases/tests/st/infer/ops/test_internal_ops, configfile: ../../../../../../../../sault/virtual_test/virtualenv_005/sault/config/pytest.ini plugins: mock-3.14.0, hydra-core-1.3.2, forked-1.6.0, anyio-4.9.0, xdist-1.32.0 collected 8 items test_swiglu_v2.py [WARNING] ME(161016:281473848516400,MainProcess):2026-01-29-17:37:20.399.03 [mindspore/context.py:1334] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. TotalTime = 0.605831, [21] [bootstrap]: 0.0007043 [type_inference]: 0.562245 [event_method]: 2.568e-05 [auto_monad]: 0.00102828 [graph_reusing]: 9.04e-06 [inline]: 4.2e-06 [add_attr]: 0.0284204, [1] [add_attr_with_inline]: 0.0284024, [1] [Cycle 1]: 0.00011966, [2] [tag_attr]: 2.781e-05 [meta_addattr_fg_expand]: 6.68998e-06 [parallel-infer-symbol]: 4.37e-06 [pre_auto_parallel]: 5.931e-05 [insert-virtual-dataset]: 3.03e-06 [parallel-infer-symbol-second]: 1.10999e-06 [dataset_repeat_opt]: 2.56e-06 [pipeline_split]: 2.04999e-06 [optimize]: 0.0122697, [53] [py_interpret_to_execute]: 3.275e-05 [rewriter_before_opt_a]: 0.00010855 [opt_a]: 0.00857307, [2] [Cycle 1]: 0.00714087, [45] [expand_dump_flag]: 3.97e-06 [switch_simplify]: 4.595e-05 [loop_unroll]: 3.025e-05 [a_1]: 0.00087117 [with_stream_mark]: 2.365e-05 [recompute_prepare]: 1.816e-05 [updatestate_depend_eliminate]: 8.16002e-06 [updatestate_assign_eliminate]: 6.67002e-06 [updatestate_loads_eliminate]: 5.66e-06 [parameter_eliminate]: 1.91e-06 [a_2]: 0.0002099 [accelerated_algorithm]: 1.502e-05 [shard]: 2.29001e-06 [meta_shard_fg_expand]: 3.99002e-06 [shard_inline]: 1.327e-05 [merge_send_recv]: 2.392e-05 [auto_parallel]: 1.551e-05 [parallel]: 5.376e-05 [flash_sp]: 2.33e-05 [merge_comm]: 7.95e-06 [allreduce_fusion]: 6.98998e-06 [matmul_add_comm_reduction]: 1.551e-05 [allreduce_slice_to_reducescatter]: 1.19e-06 [virtual_shard_identity]: 1.925e-05 [virtual_dataset]: 1.232e-05 [get_grad_eliminate_]: 1.25e-05 [virtual_output]: 1.159e-05 [merge_forward]: 9.09998e-06 [cell_reuse_recompute_pass]: 2.36998e-06 [offload_activation]: 1.632e-05 [cell_reuse_handle_not_recompute_node_pass]: 2.673e-05 [merge_recompute_call_nodes]: 2.16998e-06 [before_grad]: 2.245e-05 [set_forward_comm_id_for_comm_node_pass]: 7.83001e-06 [meta_fg_expand]: 6.00002e-06 [flash_sp_send_recv_attached]: 3.28998e-06 [receive_attached]: 1.072e-05 [after_resolve]: 1.795e-05 [a_after_grad]: 1.99e-05 [renormalize]: 0.00492231 [add_forward_monad_depend]: 8.90001e-06 [auto_monad_grad]: 2.78003e-06 [auto_monad_eliminator]: 3.277e-05 [cse]: 0.000133 [a_3]: 0.00010838 [Cycle 2]: 0.00141742, [45] [expand_dump_flag]: 3.63e-06 [switch_simplify]: 1.63e-05 [loop_unroll]: 1.287e-05 [a_1]: 0.0003664 [with_stream_mark]: 2.545e-05 [recompute_prepare]: 1.419e-05 [updatestate_depend_eliminate]: 8.85999e-06 [updatestate_assign_eliminate]: 6.53e-06 [updatestate_loads_eliminate]: 5.70001e-06 [parameter_eliminate]: 2.43998e-06 [a_2]: 0.0001804 [accelerated_algorithm]: 1.331e-05 [shard]: 2.76e-06 [meta_shard_fg_expand]: 3.6e-06 [shard_inline]: 1.279e-05 [merge_send_recv]: 1.525e-05 [auto_parallel]: 1.403e-05 [parallel]: 1.124e-05 [flash_sp]: 4.58001e-06 [merge_comm]: 7.44002e-06 [allreduce_fusion]: 6.59001e-06 [matmul_add_comm_reduction]: 1.49e-05 [allreduce_slice_to_reducescatter]: 9.50007e-07 [virtual_shard_identity]: 1.65e-05 [virtual_dataset]: 1.182e-05 [get_grad_eliminate_]: 1.22e-05 [virtual_output]: 1.176e-05 [merge_forward]: 8.26002e-06 [cell_reuse_recompute_pass]: 3.55998e-06 [offload_activation]: 1.565e-05 [cell_reuse_handle_not_recompute_node_pass]: 2.528e-05 [merge_recompute_call_nodes]: 1.55001e-06 [before_grad]: 2.151e-05 [set_forward_comm_id_for_comm_node_pass]: 7.38e-06 [meta_fg_expand]: 5.69e-06 [flash_sp_send_recv_attached]: 2.59001e-06 [receive_attached]: 2.86e-06 [after_resolve]: 1.825e-05 [a_after_grad]: 1.855e-05 [renormalize]: 6.99947e-08 [add_forward_monad_depend]: 2.88e-06 [auto_monad_grad]: 3.01001e-06 [auto_monad_eliminator]: 2.212e-05 [cse]: 0.00010967 [a_3]: 8.365e-05 [py_interpret_to_execute_after_opt_a]: 2.554e-05 [slice_cell_reuse_recomputed_activation]: 2.57001e-06 [rewriter_after_opt_a]: 0.0003139 [convert_after_rewriter]: 3.15e-05 [order_py_execute_after_rewriter]: 8.55999e-06 [mutable_eliminate]: 0.00076916 [opt_b]: 0.00049369, [1] [Cycle 1]: 0.00048502, [7] [b_1]: 0.00032096 [b_2]: 1.599e-05 [updatestate_depend_eliminate]: 1.387e-05 [updatestate_assign_eliminate]: 6.11e-06 [updatestate_loads_eliminate]: 5.86998e-06 [renormalize]: 7.50006e-07 [cse]: 7.354e-05 [optimize_parallel_all_gather_comm]: 3.286e-05 [overlap_param_gather]: 2.04999e-06 [cconv]: 4.184e-05 [loop_unroll]: 0.00060633 [opt_after_cconv]: 0.00019624, [1] [Cycle 1]: 0.0001883, [7] [c_1]: 6.078e-05 [parameter_eliminate]: 5.84e-06 [updatestate_depend_eliminate]: 1.134e-05 [updatestate_assign_eliminate]: 5.77001e-06 [updatestate_loads_eliminate]: 5.02999e-06 [cse]: 4.967e-05 [renormalize]: 6.90023e-07 [remove_dup_value]: 6.661e-05 [tuple_transform]: 0.00018223, [1] [Cycle 1]: 0.00017622, [4] [d_1]: 0.00013502 [none_parameter_eliminate]: 2.59999e-06 [renormalize]: 3.10014e-07 [switch_simplify]: 1.333e-05 [partial_unused_args_eliminate]: 2.36e-06 [add_recomputation]: 0.00010304 [cse_after_recomputation]: 4.558e-05, [1] [Cycle 1]: 4.009e-05, [1] [cse]: 3.324e-05 [environ_conv]: 2.757e-05 [swap_dp_allreduce_reducescatter]: 1.119e-05 [bias_add_comm_swap]: 3.11999e-06 [label_micro_interleaved_index]: 7.41999e-06 [label_fine_grained_interleaved_index]: 3.06001e-06 [merge_cast_opt]: 1.59e-06 [slice_recompute_activation]: 2.66999e-06 [micro_interleaved_order_control]: 2.53e-06 [assign_add_opt]: 1.29e-06 [ForceFp32Comm]: 9.79984e-07 [remove_cast_before_assign_add]: 1.15999e-06 [full_micro_interleaved_order_control]: 2.27001e-06 [reorder_send_recv_between_fp_bp]: 3.33998e-06 [comm_op_add_attrs]: 1.19e-06 [add_comm_op_reuse_tag]: 1.05999e-06 [interleave_split_concat_branches]: 1.42e-06 [interleave_parallel_branches]: 1.16997e-06 [overlap_opt_shard_in_pipeline]: 2.236e-05 [overlap_opt_shard_grad_in_pipeline]: 2.39001e-06 [control_data_broadcast_order]: 2.531e-05 [grouped_pairwise_exchange_alltoall]: 2.67001e-06 [offloading_packed_experts]: 7.06001e-06 [overlap_recompute_and_grad_model_parallel]: 6.83e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.64e-06 [overlap_recompute_allgather_and_fa_grad]: 1.64e-06 [overlap_recompute_comm]: 3.08e-06 [overlap_grad_ring_attention]: 7.06001e-06 [overlap_grad_flash_sp]: 3.631e-05 [begin_end_overlap_inline]: 5.89993e-07 [split_matmul_comm_elemetwise]: 2.31e-06 [split_layernorm_comm]: 1.86998e-06 [handle_group_info]: 9.89996e-07 [symbol_engine_optimizer]: 0.00015506, [1] [Cycle 1]: 0.00014967, [6] [build]: 3.126e-05 [elim_shapecalc]: 2.371e-05 [elim_not_effective]: 2.564e-05 [opt_reshape]: 1.328e-05 [fold_const_symbol]: 2.091e-05 [renormalize]: 3.30008e-07 [detach_backward]: 2.87002e-06 [pipeline_parallel_scheduler]: 1.55001e-06 [auto_monad_reorder]: 3.549e-05 [get_jit_bprop_graph]: 2.32999e-06 [rewriter_after_jit_bprop_graph]: 5.92999e-06 [opt_after_jit_grad]: 0.0006635 [validate]: 9.196e-05 Sums bootstrap : 0.000704s : 0.12% type_inference : 0.562245s : 97.59% event_method : 0.000026s : 0.00% auto_monad : 0.001028s : 0.18% graph_reusing : 0.000009s : 0.00% inline : 0.000004s : 0.00% add_attr.add_attr_with_inline.tag_attr : 0.000028s : 0.00% add_attr.add_attr_with_inline.meta_addattr_fg_expand : 0.000007s : 0.00% parallel-infer-symbol : 0.000004s : 0.00% pre_auto_parallel : 0.000059s : 0.01% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000003s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000033s : 0.01% optimize.rewriter_before_opt_a : 0.000109s : 0.02% optimize.opt_a.expand_dump_flag : 0.000008s : 0.00% optimize.opt_a.switch_simplify : 0.000062s : 0.01% optimize.opt_a.loop_unroll : 0.000043s : 0.01% optimize.opt_a.a_1 : 0.001238s : 0.21% optimize.opt_a.with_stream_mark : 0.000049s : 0.01% optimize.opt_a.recompute_prepare : 0.000032s : 0.01% optimize.opt_a.updatestate_depend_eliminate : 0.000017s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000013s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.00% optimize.opt_a.parameter_eliminate : 0.000004s : 0.00% optimize.opt_a.a_2 : 0.000390s : 0.07% optimize.opt_a.accelerated_algorithm : 0.000028s : 0.00% optimize.opt_a.shard : 0.000005s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000008s : 0.00% optimize.opt_a.shard_inline : 0.000026s : 0.00% optimize.opt_a.merge_send_recv : 0.000039s : 0.01% optimize.opt_a.auto_parallel : 0.000030s : 0.01% optimize.opt_a.parallel : 0.000065s : 0.01% optimize.opt_a.flash_sp : 0.000028s : 0.00% optimize.opt_a.merge_comm : 0.000015s : 0.00% optimize.opt_a.allreduce_fusion : 0.000014s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000030s : 0.01% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000036s : 0.01% optimize.opt_a.virtual_dataset : 0.000024s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000025s : 0.00% optimize.opt_a.virtual_output : 0.000023s : 0.00% optimize.opt_a.merge_forward : 0.000017s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000006s : 0.00% optimize.opt_a.offload_activation : 0.000032s : 0.01% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000052s : 0.01% optimize.opt_a.merge_recompute_call_nodes : 0.000004s : 0.00% optimize.opt_a.before_grad : 0.000044s : 0.01% optimize.opt_a.set_forward_comm_id_for_comm_node_pass : 0.000015s : 0.00% optimize.opt_a.meta_fg_expand : 0.000012s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.00% optimize.opt_a.receive_attached : 0.000014s : 0.00% optimize.opt_a.after_resolve : 0.000036s : 0.01% optimize.opt_a.a_after_grad : 0.000038s : 0.01% optimize.opt_a.renormalize : 0.004922s : 0.85% optimize.opt_a.add_forward_monad_depend : 0.000012s : 0.00% optimize.opt_a.auto_monad_grad : 0.000006s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000055s : 0.01% optimize.opt_a.cse : 0.000243s : 0.04% optimize.opt_a.a_3 : 0.000192s : 0.03% optimize.py_interpret_to_execute_after_opt_a : 0.000026s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000314s : 0.05% optimize.convert_after_rewriter : 0.000031s : 0.01% optimize.order_py_execute_after_rewriter : 0.000009s : 0.00% optimize.mutable_eliminate : 0.000769s : 0.13% optimize.opt_b.b_1 : 0.000321s : 0.06% optimize.opt_b.b_2 : 0.000016s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000014s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000006s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000074s : 0.01% optimize.optimize_parallel_all_gather_comm : 0.000033s : 0.01% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000042s : 0.01% optimize.loop_unroll : 0.000606s : 0.11% optimize.opt_after_cconv.c_1 : 0.000061s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000006s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000011s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000006s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.cse : 0.000050s : 0.01% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000067s : 0.01% optimize.tuple_transform.d_1 : 0.000135s : 0.02% optimize.tuple_transform.none_parameter_eliminate : 0.000003s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.tuple_transform.switch_simplify : 0.000013s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_recomputation : 0.000103s : 0.02% optimize.cse_after_recomputation.cse : 0.000033s : 0.01% optimize.environ_conv : 0.000028s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000011s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000007s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000003s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000003s : 0.00% optimize.micro_interleaved_order_control : 0.000003s : 0.00% optimize.assign_add_opt : 0.000001s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000001s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000022s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000025s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000003s : 0.00% optimize.offloading_packed_experts : 0.000007s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000007s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000002s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000002s : 0.00% optimize.overlap_recompute_comm : 0.000003s : 0.00% optimize.overlap_grad_ring_attention : 0.000007s : 0.00% optimize.overlap_grad_flash_sp : 0.000036s : 0.01% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000031s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000024s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000026s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000013s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000021s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% detach_backward : 0.000003s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000035s : 0.01% get_jit_bprop_graph : 0.000002s : 0.00% rewriter_after_jit_bprop_graph : 0.000006s : 0.00% opt_after_jit_grad : 0.000663s : 0.12% validate : 0.000092s : 0.02% Time group info: ------[substitution.] 0.000482 111 10.12% : 0.000049s : 6: substitution.arithmetic_simplify 0.82% : 0.000004s : 6: substitution.elim_not_effective 5.11% : 0.000025s : 6: substitution.float_tuple_getitem_switch 0.65% : 0.000003s : 6: substitution.fold_const_symbol 2.50% : 0.000012s : 8: substitution.graph_param_transform 41.74% : 0.000201s : 5: substitution.inline 1.85% : 0.000009s : 12: substitution.j_node_and_user_rematch 2.54% : 0.000012s : 4: substitution.minmaximum_grad 2.44% : 0.000012s : 12: substitution.remove_not_recompute_node 1.31% : 0.000006s : 2: substitution.replace_old_param 7.31% : 0.000035s : 8: substitution.tuple_list_convert_item_index_to_positive 2.96% : 0.000014s : 8: substitution.tuple_list_get_item_const_eliminator 4.84% : 0.000023s : 8: substitution.tuple_list_get_item_depend_reorder 11.28% : 0.000054s : 12: substitution.tuple_list_get_item_eliminator 4.55% : 0.000022s : 8: substitution.tuple_list_get_set_item_eliminator ------[type_inference.] 0.562134 2 99.38% : 0.558635s : 1: type_inference.infer 0.62% : 0.003498s : 1: type_inference.specialize ------[replace.] 0.000045 5 100.00% : 0.000045s : 5: replace.inline ------[match.] 0.000196 5 100.00% : 0.000196s : 5: match.inline ------[predicate.] 0.000402 2113 1.05% : 0.000004s : 21: predicate.accumulaten_eliminater 0.97% : 0.000004s : 8: predicate.ad_related_special_op_eliminate 0.69% : 0.000003s : 16: predicate.addn_check_dump 0.85% : 0.000003s : 21: predicate.addn_zero_filter 0.79% : 0.000003s : 21: predicate.adjust_all_reduce_mul_add 2.36% : 0.000009s : 37: predicate.arithmetic_simplify 0.97% : 0.000004s : 21: predicate.cast_eliminate 0.65% : 0.000003s : 16: predicate.check_bprop_eliminate 0.62% : 0.000002s : 16: predicate.compare_switch_simplify 0.20% : 0.000001s : 8: predicate.const_output_eliminate 0.66% : 0.000003s : 16: predicate.depend_value_elim 0.99% : 0.000004s : 21: predicate.dict_get_item_const_eliminator 0.98% : 0.000004s : 21: predicate.dict_get_item_eliminator 0.91% : 0.000004s : 21: predicate.dict_set_item_eliminator 1.09% : 0.000004s : 16: predicate.dumpgradient_eliminate 0.27% : 0.000001s : 8: predicate.elim_not_effective 0.62% : 0.000003s : 8: predicate.elim_shapecalc_of_broadcastargs 1.18% : 0.000005s : 29: predicate.environ_add_const_eliminate 1.10% : 0.000004s : 29: predicate.environ_get_add_eliminate 1.12% : 0.000005s : 29: predicate.environ_get_depend_swap 1.74% : 0.000007s : 45: predicate.environ_get_eliminate 1.13% : 0.000005s : 29: predicate.environ_get_set_eliminate 1.04% : 0.000004s : 26: predicate.exchange_switch_depend_value 1.68% : 0.000007s : 26: predicate.float_depend_g_call 0.64% : 0.000003s : 16: predicate.float_environ_get_switch 1.19% : 0.000005s : 24: predicate.float_tuple_getitem_switch 0.19% : 0.000001s : 8: predicate.fold_const_symbol 0.75% : 0.000003s : 16: predicate.get_grad_eliminate 0.28% : 0.000001s : 8: predicate.graph_param_transform 0.71% : 0.000003s : 16: predicate.incorporate_call 0.63% : 0.000003s : 16: predicate.incorporate_call_switch 5.87% : 0.000024s : 95: predicate.inline 0.97% : 0.000004s : 16: predicate.inline_without_move 0.37% : 0.000001s : 16: predicate.j_node_and_user_rematch 0.95% : 0.000004s : 16: predicate.less_batch_normalization 1.66% : 0.000007s : 37: predicate.list_to_tuple_eliminator_ 2.17% : 0.000009s : 58: predicate.load_eliminater 0.98% : 0.000004s : 8: predicate.loop_unroll_after_grad 1.64% : 0.000007s : 36: predicate.loop_unroll_before_grad 1.93% : 0.000008s : 37: predicate.make_slice_get_slice_eliminator 0.71% : 0.000003s : 16: predicate.merge_addn 0.66% : 0.000003s : 16: predicate.micro_step_allgather_replace 0.77% : 0.000003s : 16: predicate.mini_step_allgather_replace 0.83% : 0.000003s : 21: predicate.minmaximum_grad 1.15% : 0.000005s : 8: predicate.mutable_eliminate 0.39% : 0.000002s : 8: predicate.opt_reshape 0.43% : 0.000002s : 8: predicate.parallel_virtual_node 1.62% : 0.000007s : 26: predicate.partial_defer_inline 1.29% : 0.000005s : 29: predicate.partial_eliminate 0.84% : 0.000003s : 21: predicate.print_const_string_wrapper 0.68% : 0.000003s : 16: predicate.reduce_all_const_elim 1.12% : 0.000004s : 21: predicate.reduce_eliminate 2.33% : 0.000009s : 58: predicate.redundant_stop_gradient_eliminater 0.44% : 0.000002s : 16: predicate.remove_not_recompute_node 1.34% : 0.000005s : 37: predicate.replace_applicator 0.48% : 0.000002s : 16: predicate.replace_old_param 0.34% : 0.000001s : 8: predicate.reset_defer_inline 0.90% : 0.000004s : 21: predicate.reshape_eliminate 0.70% : 0.000003s : 16: predicate.row_tensor_add_zeros_like 0.43% : 0.000002s : 8: predicate.row_tensor_eliminate 1.01% : 0.000004s : 16: predicate.same_eliminate 0.52% : 0.000002s : 16: predicate.set_cell_output_no_recompute 0.94% : 0.000004s : 16: predicate.shard_identity_eliminate 0.85% : 0.000003s : 16: predicate.special_op_eliminate 0.90% : 0.000004s : 16: predicate.specialize_transform 1.18% : 0.000005s : 16: predicate.split_environ_get_set_with_tuple_value 0.96% : 0.000004s : 16: predicate.stack_unstack_eliminate 0.41% : 0.000002s : 8: predicate.switch_call_monad_eliminater 1.12% : 0.000004s : 26: predicate.switch_defer_inline 1.77% : 0.000007s : 42: predicate.switch_layer_defer_inline 4.01% : 0.000016s : 86: predicate.switch_simplify 0.84% : 0.000003s : 21: predicate.tile_eliminate 0.85% : 0.000003s : 21: predicate.transpose_eliminate 1.79% : 0.000007s : 37: predicate.tuple_list_convert_item_index_to_positive 1.81% : 0.000007s : 37: predicate.tuple_list_get_item_const_eliminator 1.63% : 0.000007s : 37: predicate.tuple_list_get_item_depend_reorder 3.43% : 0.000014s : 53: predicate.tuple_list_get_item_eliminator 1.68% : 0.000007s : 37: predicate.tuple_list_get_set_item_eliminator 2.46% : 0.000010s : 53: predicate.tuple_list_set_item_eliminator 1.79% : 0.000007s : 37: predicate.tuple_to_list_eliminator_ 2.28% : 0.000009s : 58: predicate.updatestate_pure_node_eliminater 2.97% : 0.000012s : 74: predicate.updatestate_useless_node_eliminater 0.40% : 0.000002s : 8: predicate.value_based_eliminate 0.76% : 0.000003s : 16: predicate.virtual_dataset_eliminate 0.81% : 0.000003s : 16: predicate.virtual_output_eliminate 0.35% : 0.000001s : 8: predicate.virtual_view_grad_eliminate 0.49% : 0.000002s : 8: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.005009 32 72.89% : 0.003651s : 25: func_graph_cloner_run.FuncGraphClonerGraph 27.11% : 0.001358s : 7: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 0.654072 192 0.00% : 0.000004s : 1: ForceFp32Comm 4.35% : 0.028427s : 1: add_attr 4.34% : 0.028408s : 1: add_attr_with_inline 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.02% : 0.000108s : 1: add_recomputation 0.00% : 0.000004s : 1: assign_add_opt 0.16% : 0.001059s : 1: auto_monad 0.01% : 0.000043s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.11% : 0.000752s : 1: bootstrap 0.01% : 0.000046s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000029s : 1: control_data_broadcast_order 0.01% : 0.000038s : 1: convert_after_rewriter 0.01% : 0.000049s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000006s : 1: detach_backward 0.00% : 0.000032s : 1: environ_conv 0.01% : 0.000034s : 1: event_method 0.00% : 0.000005s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000016s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000011s : 1: label_micro_interleaved_index 0.09% : 0.000618s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.12% : 0.000781s : 1: mutable_eliminate 0.00% : 0.000010s : 1: offloading_packed_experts 0.00% : 0.000028s : 1: opt.transform.loop_unroll_optimizer 0.00% : 0.000032s : 1: opt.transform.mutable_eliminate 0.33% : 0.002165s : 78: opt.transform.opt_a 0.01% : 0.000059s : 1: opt.transform.opt_after_cconv 0.01% : 0.000049s : 1: opt.transform.opt_after_jit_grad 0.05% : 0.000300s : 28: opt.transform.opt_b 0.02% : 0.000146s : 2: opt.transform.opt_trans_graph 0.01% : 0.000079s : 4: opt.transform.symbol_engine_opt 1.31% : 0.008577s : 1: opt_a 0.03% : 0.000201s : 1: opt_after_cconv 0.10% : 0.000677s : 1: opt_after_jit_grad 0.08% : 0.000498s : 1: opt_b 1.88% : 0.012276s : 1: optimize 0.01% : 0.000038s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000012s : 1: order_py_execute_after_rewriter 0.01% : 0.000041s : 1: overlap_grad_flash_sp 0.00% : 0.000005s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000011s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000026s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000006s : 1: overlap_param_gather 0.00% : 0.000005s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000010s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000006s : 1: overlap_recompute_comm 0.00% : 0.000009s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000005s : 1: pipeline_parallel_scheduler 0.00% : 0.000005s : 1: pipeline_split 0.01% : 0.000064s : 1: pre_auto_parallel 0.01% : 0.000037s : 1: py_interpret_to_execute 0.00% : 0.000030s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.01% : 0.000072s : 1: remove_dup_value 0.51% : 0.003305s : 1: renormalize.infer 0.25% : 0.001603s : 1: renormalize.specialize 0.00% : 0.000007s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000010s : 1: rewriter_after_jit_bprop_graph 0.05% : 0.000327s : 1: rewriter_after_opt_a 0.02% : 0.000113s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000006s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000015s : 1: swap_dp_allreduce_reducescatter 0.02% : 0.000158s : 1: symbol_engine_optimizer 0.03% : 0.000186s : 1: tuple_transform 85.97% : 0.562284s : 1: type_inference mki_log delete old file:/home/jenkins/ascend/log/atb/atb_56161_20260129171752.log . [hook] pytest_runtest_teardown:test_swiglu[False-float32--1-shape0] tests/st/infer/ops/test_internal_ops/test_swiglu_v2.py::test_swiglu[False-float32--1-shape0],max_mem:100.0M [WARNING] ME(161016:281473848516400,MainProcess):2026-01-29-17:37:49.376.301 [mindspore/context.py:1334] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. TotalTime = 1.25923, [21] [bootstrap]: 0.0007029 [type_inference]: 1.13934 [event_method]: 2.684e-05 [auto_monad]: 9.939e-05 [graph_reusing]: 6.55002e-06 [inline]: 3.46999e-06 [add_attr]: 0.00670592, [1] [add_attr_with_inline]: 0.0066902, [1] [Cycle 1]: 7.91e-05, [2] [tag_attr]: 2.532e-05 [meta_addattr_fg_expand]: 6.04001e-06 [parallel-infer-symbol]: 3.98001e-06 [pre_auto_parallel]: 4.5e-05 [insert-virtual-dataset]: 2.43998e-06 [parallel-infer-symbol-second]: 8.50006e-07 [dataset_repeat_opt]: 2.24001e-06 [pipeline_split]: 1.81e-06 [optimize]: 0.111472, [53] [py_interpret_to_execute]: 2.882e-05 [rewriter_before_opt_a]: 9.699e-05 [opt_a]: 0.108234, [2] [Cycle 1]: 0.106904, [45] [expand_dump_flag]: 3.31999e-06 [switch_simplify]: 0.0980139 [loop_unroll]: 5.026e-05 [a_1]: 0.00109433 [with_stream_mark]: 3.639e-05 [recompute_prepare]: 1.918e-05 [updatestate_depend_eliminate]: 9.89001e-06 [updatestate_assign_eliminate]: 7.13e-06 [updatestate_loads_eliminate]: 6.12999e-06 [parameter_eliminate]: 2.69999e-06 [a_2]: 0.00022035 [accelerated_algorithm]: 1.539e-05 [shard]: 2.80997e-06 [meta_shard_fg_expand]: 4.23999e-06 [shard_inline]: 1.383e-05 [merge_send_recv]: 1.453e-05 [auto_parallel]: 1.491e-05 [parallel]: 3.604e-05 [flash_sp]: 1.24e-05 [merge_comm]: 7.43e-06 [allreduce_fusion]: 6.96001e-06 [matmul_add_comm_reduction]: 1.721e-05 [allreduce_slice_to_reducescatter]: 1.30999e-06 [virtual_shard_identity]: 1.794e-05 [virtual_dataset]: 1.346e-05 [get_grad_eliminate_]: 1.427e-05 [virtual_output]: 1.417e-05 [merge_forward]: 7.85e-06 [cell_reuse_recompute_pass]: 1.44998e-06 [offload_activation]: 1.668e-05 [cell_reuse_handle_not_recompute_node_pass]: 2.943e-05 [merge_recompute_call_nodes]: 2.02999e-06 [before_grad]: 2.559e-05 [set_forward_comm_id_for_comm_node_pass]: 8.23001e-06 [meta_fg_expand]: 6.36e-06 [flash_sp_send_recv_attached]: 2.86e-06 [receive_attached]: 2.72001e-06 [after_resolve]: 1.8e-05 [a_after_grad]: 2.522e-05 [renormalize]: 0.00645684 [add_forward_monad_depend]: 1.105e-05 [auto_monad_grad]: 2.69001e-06 [auto_monad_eliminator]: 3.273e-05 [cse]: 7.295e-05 [a_3]: 0.00011067 [Cycle 2]: 0.00131453, [45] [expand_dump_flag]: 2.73e-06 [switch_simplify]: 1.646e-05 [loop_unroll]: 1.338e-05 [a_1]: 0.00038121 [with_stream_mark]: 2.43e-05 [recompute_prepare]: 1.425e-05 [updatestate_depend_eliminate]: 8.62e-06 [updatestate_assign_eliminate]: 5.86e-06 [updatestate_loads_eliminate]: 5.57001e-06 [parameter_eliminate]: 2.42001e-06 [a_2]: 0.00017872 [accelerated_algorithm]: 1.251e-05 [shard]: 2.38998e-06 [meta_shard_fg_expand]: 3.90998e-06 [shard_inline]: 1.206e-05 [merge_send_recv]: 1.309e-05 [auto_parallel]: 1.462e-05 [parallel]: 8.89998e-06 [flash_sp]: 4.05998e-06 [merge_comm]: 7.17002e-06 [allreduce_fusion]: 6.53998e-06 [matmul_add_comm_reduction]: 1.312e-05 [allreduce_slice_to_reducescatter]: 9.49978e-07 [virtual_shard_identity]: 1.411e-05 [virtual_dataset]: 1.288e-05 [get_grad_eliminate_]: 1.283e-05 [virtual_output]: 1.111e-05 [merge_forward]: 6.77002e-06 [cell_reuse_recompute_pass]: 3.08e-06 [offload_activation]: 1.498e-05 [cell_reuse_handle_not_recompute_node_pass]: 2.318e-05 [merge_recompute_call_nodes]: 1.54e-06 [before_grad]: 2.089e-05 [set_forward_comm_id_for_comm_node_pass]: 7.18998e-06 [meta_fg_expand]: 5.96e-06 [flash_sp_send_recv_attached]: 1.94e-06 [receive_attached]: 2.11e-06 [after_resolve]: 1.62e-05 [a_after_grad]: 1.843e-05 [renormalize]: 9.00181e-08 [add_forward_monad_depend]: 1.71002e-06 [auto_monad_grad]: 1.48002e-06 [auto_monad_eliminator]: 1.448e-05 [cse]: 6.997e-05 [a_3]: 7.947e-05 [py_interpret_to_execute_after_opt_a]: 2.042e-05 [slice_cell_reuse_recomputed_activation]: 2.34999e-06 [rewriter_after_opt_a]: 0.00026326 [convert_after_rewriter]: 1.228e-05 [order_py_execute_after_rewriter]: 9.13002e-06 [mutable_eliminate]: 0.00075176 [opt_b]: 0.00043393, [1] [Cycle 1]: 0.00042563, [7] [b_1]: 0.00030642 [b_2]: 1.447e-05 [updatestate_depend_eliminate]: 1.068e-05 [updatestate_assign_eliminate]: 5.49e-06 [updatestate_loads_eliminate]: 5.00001e-06 [renormalize]: 6.39993e-07 [cse]: 4.38e-05 [optimize_parallel_all_gather_comm]: 2.811e-05 [overlap_param_gather]: 2.06e-06 [cconv]: 3.244e-05 [loop_unroll]: 0.00049646 [opt_after_cconv]: 0.00016706, [1] [Cycle 1]: 0.00016051, [7] [c_1]: 6.116e-05 [parameter_eliminate]: 2.60002e-06 [updatestate_depend_eliminate]: 9.89999e-06 [updatestate_assign_eliminate]: 6.46e-06 [updatestate_loads_eliminate]: 4.87e-06 [cse]: 3.672e-05 [renormalize]: 4.89992e-07 [remove_dup_value]: 5.707e-05 [tuple_transform]: 0.00016234, [1] [Cycle 1]: 0.00015737, [4] [d_1]: 0.00012022 [none_parameter_eliminate]: 1.82999e-06 [renormalize]: 3.09985e-07 [switch_simplify]: 1.474e-05 [partial_unused_args_eliminate]: 1.92999e-06 [add_recomputation]: 9.4e-05 [cse_after_recomputation]: 4.113e-05, [1] [Cycle 1]: 3.502e-05, [1] [cse]: 2.888e-05 [environ_conv]: 1.705e-05 [swap_dp_allreduce_reducescatter]: 1.011e-05 [bias_add_comm_swap]: 3.23e-06 [label_micro_interleaved_index]: 4.80999e-06 [label_fine_grained_interleaved_index]: 3.53e-06 [merge_cast_opt]: 1.36002e-06 [slice_recompute_activation]: 2.21e-06 [micro_interleaved_order_control]: 2.75002e-06 [assign_add_opt]: 1.31002e-06 [ForceFp32Comm]: 9.39996e-07 [remove_cast_before_assign_add]: 1.19e-06 [full_micro_interleaved_order_control]: 2.17001e-06 [reorder_send_recv_between_fp_bp]: 3.35e-06 [comm_op_add_attrs]: 1.14998e-06 [add_comm_op_reuse_tag]: 1.06997e-06 [interleave_split_concat_branches]: 1.75001e-06 [interleave_parallel_branches]: 1.27e-06 [overlap_opt_shard_in_pipeline]: 2.37001e-06 [overlap_opt_shard_grad_in_pipeline]: 2.08002e-06 [control_data_broadcast_order]: 2.295e-05 [grouped_pairwise_exchange_alltoall]: 1.84e-06 [offloading_packed_experts]: 6.38998e-06 [overlap_recompute_and_grad_model_parallel]: 7.06001e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.53002e-06 [overlap_recompute_allgather_and_fa_grad]: 1.50001e-06 [overlap_recompute_comm]: 2.16998e-06 [overlap_grad_ring_attention]: 6.96999e-06 [overlap_grad_flash_sp]: 3.277e-05 [begin_end_overlap_inline]: 6.50005e-07 [split_matmul_comm_elemetwise]: 2.24999e-06 [split_layernorm_comm]: 1.77999e-06 [handle_group_info]: 9.90025e-07 [symbol_engine_optimizer]: 0.00012322, [1] [Cycle 1]: 0.00011682, [6] [build]: 1.188e-05 [elim_shapecalc]: 1.74e-05 [elim_not_effective]: 2.445e-05 [opt_reshape]: 1.303e-05 [fold_const_symbol]: 2.001e-05 [renormalize]: 2.3999e-07 [detach_backward]: 2.31e-06 [pipeline_parallel_scheduler]: 1.71e-06 [auto_monad_reorder]: 2.763e-05 [get_jit_bprop_graph]: 2.70002e-06 [rewriter_after_jit_bprop_graph]: 4.82998e-06 [opt_after_jit_grad]: 0.0005168 [validate]: 6.612e-05 Sums bootstrap : 0.000703s : 0.06% type_inference : 1.139339s : 91.05% event_method : 0.000027s : 0.00% auto_monad : 0.000099s : 0.01% graph_reusing : 0.000007s : 0.00% inline : 0.000003s : 0.00% add_attr.add_attr_with_inline.tag_attr : 0.000025s : 0.00% add_attr.add_attr_with_inline.meta_addattr_fg_expand : 0.000006s : 0.00% parallel-infer-symbol : 0.000004s : 0.00% pre_auto_parallel : 0.000045s : 0.00% insert-virtual-dataset : 0.000002s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000029s : 0.00% optimize.rewriter_before_opt_a : 0.000097s : 0.01% optimize.opt_a.expand_dump_flag : 0.000006s : 0.00% optimize.opt_a.switch_simplify : 0.098030s : 7.83% optimize.opt_a.loop_unroll : 0.000064s : 0.01% optimize.opt_a.a_1 : 0.001476s : 0.12% optimize.opt_a.with_stream_mark : 0.000061s : 0.00% optimize.opt_a.recompute_prepare : 0.000033s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000019s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000013s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.00% optimize.opt_a.parameter_eliminate : 0.000005s : 0.00% optimize.opt_a.a_2 : 0.000399s : 0.03% optimize.opt_a.accelerated_algorithm : 0.000028s : 0.00% optimize.opt_a.shard : 0.000005s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000008s : 0.00% optimize.opt_a.shard_inline : 0.000026s : 0.00% optimize.opt_a.merge_send_recv : 0.000028s : 0.00% optimize.opt_a.auto_parallel : 0.000030s : 0.00% optimize.opt_a.parallel : 0.000045s : 0.00% optimize.opt_a.flash_sp : 0.000016s : 0.00% optimize.opt_a.merge_comm : 0.000015s : 0.00% optimize.opt_a.allreduce_fusion : 0.000013s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000030s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000032s : 0.00% optimize.opt_a.virtual_dataset : 0.000026s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000027s : 0.00% optimize.opt_a.virtual_output : 0.000025s : 0.00% optimize.opt_a.merge_forward : 0.000015s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000005s : 0.00% optimize.opt_a.offload_activation : 0.000032s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000053s : 0.00% optimize.opt_a.merge_recompute_call_nodes : 0.000004s : 0.00% optimize.opt_a.before_grad : 0.000046s : 0.00% optimize.opt_a.set_forward_comm_id_for_comm_node_pass : 0.000015s : 0.00% optimize.opt_a.meta_fg_expand : 0.000012s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.00% optimize.opt_a.receive_attached : 0.000005s : 0.00% optimize.opt_a.after_resolve : 0.000034s : 0.00% optimize.opt_a.a_after_grad : 0.000044s : 0.00% optimize.opt_a.renormalize : 0.006457s : 0.52% optimize.opt_a.add_forward_monad_depend : 0.000013s : 0.00% optimize.opt_a.auto_monad_grad : 0.000004s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000047s : 0.00% optimize.opt_a.cse : 0.000143s : 0.01% optimize.opt_a.a_3 : 0.000190s : 0.02% optimize.py_interpret_to_execute_after_opt_a : 0.000020s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000263s : 0.02% optimize.convert_after_rewriter : 0.000012s : 0.00% optimize.order_py_execute_after_rewriter : 0.000009s : 0.00% optimize.mutable_eliminate : 0.000752s : 0.06% optimize.opt_b.b_1 : 0.000306s : 0.02% optimize.opt_b.b_2 : 0.000014s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000011s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000005s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000005s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000044s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000028s : 0.00% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000032s : 0.00% optimize.loop_unroll : 0.000496s : 0.04% optimize.opt_after_cconv.c_1 : 0.000061s : 0.00% optimize.opt_after_cconv.parameter_eliminate : 0.000003s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000010s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000006s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.cse : 0.000037s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000057s : 0.00% optimize.tuple_transform.d_1 : 0.000120s : 0.01% optimize.tuple_transform.none_parameter_eliminate : 0.000002s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.tuple_transform.switch_simplify : 0.000015s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_recomputation : 0.000094s : 0.01% optimize.cse_after_recomputation.cse : 0.000029s : 0.00% optimize.environ_conv : 0.000017s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000010s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000005s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000004s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000003s : 0.00% optimize.assign_add_opt : 0.000001s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000002s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000023s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000006s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000007s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000002s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000002s : 0.00% optimize.overlap_recompute_comm : 0.000002s : 0.00% optimize.overlap_grad_ring_attention : 0.000007s : 0.00% optimize.overlap_grad_flash_sp : 0.000033s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000012s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000017s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000024s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000013s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000020s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% detach_backward : 0.000002s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000028s : 0.00% get_jit_bprop_graph : 0.000003s : 0.00% rewriter_after_jit_bprop_graph : 0.000005s : 0.00% opt_after_jit_grad : 0.000517s : 0.04% validate : 0.000066s : 0.01% Time group info: ------[substitution.] 0.000495 111 9.35% : 0.000046s : 6: substitution.arithmetic_simplify 0.71% : 0.000004s : 6: substitution.elim_not_effective 2.39% : 0.000012s : 6: substitution.float_tuple_getitem_switch 0.59% : 0.000003s : 6: substitution.fold_const_symbol 2.12% : 0.000010s : 8: substitution.graph_param_transform 48.67% : 0.000241s : 5: substitution.inline 1.71% : 0.000008s : 12: substitution.j_node_and_user_rematch 2.45% : 0.000012s : 4: substitution.minmaximum_grad 2.25% : 0.000011s : 12: substitution.remove_not_recompute_node 1.12% : 0.000006s : 2: substitution.replace_old_param 6.67% : 0.000033s : 8: substitution.tuple_list_convert_item_index_to_positive 3.02% : 0.000015s : 8: substitution.tuple_list_get_item_const_eliminator 4.25% : 0.000021s : 8: substitution.tuple_list_get_item_depend_reorder 10.42% : 0.000052s : 12: substitution.tuple_list_get_item_eliminator 4.26% : 0.000021s : 8: substitution.tuple_list_get_set_item_eliminator ------[type_inference.] 1.139224 2 99.61% : 1.134811s : 1: type_inference.infer 0.39% : 0.004414s : 1: type_inference.specialize ------[replace.] 0.000053 5 100.00% : 0.000053s : 5: replace.inline ------[match.] 0.000237 5 100.00% : 0.000237s : 5: match.inline ------[predicate.] 0.000420 2113 0.86% : 0.000004s : 21: predicate.accumulaten_eliminater 0.75% : 0.000003s : 8: predicate.ad_related_special_op_eliminate 0.66% : 0.000003s : 16: predicate.addn_check_dump 0.92% : 0.000004s : 21: predicate.addn_zero_filter 0.88% : 0.000004s : 21: predicate.adjust_all_reduce_mul_add 2.31% : 0.000010s : 37: predicate.arithmetic_simplify 0.88% : 0.000004s : 21: predicate.cast_eliminate 0.73% : 0.000003s : 16: predicate.check_bprop_eliminate 0.67% : 0.000003s : 16: predicate.compare_switch_simplify 0.20% : 0.000001s : 8: predicate.const_output_eliminate 0.66% : 0.000003s : 16: predicate.depend_value_elim 0.88% : 0.000004s : 21: predicate.dict_get_item_const_eliminator 1.05% : 0.000004s : 21: predicate.dict_get_item_eliminator 0.82% : 0.000003s : 21: predicate.dict_set_item_eliminator 0.84% : 0.000004s : 16: predicate.dumpgradient_eliminate 0.21% : 0.000001s : 8: predicate.elim_not_effective 0.41% : 0.000002s : 8: predicate.elim_shapecalc_of_broadcastargs 1.16% : 0.000005s : 29: predicate.environ_add_const_eliminate 1.25% : 0.000005s : 29: predicate.environ_get_add_eliminate 1.13% : 0.000005s : 29: predicate.environ_get_depend_swap 1.81% : 0.000008s : 45: predicate.environ_get_eliminate 1.11% : 0.000005s : 29: predicate.environ_get_set_eliminate 1.03% : 0.000004s : 26: predicate.exchange_switch_depend_value 1.91% : 0.000008s : 26: predicate.float_depend_g_call 0.65% : 0.000003s : 16: predicate.float_environ_get_switch 1.05% : 0.000004s : 24: predicate.float_tuple_getitem_switch 0.20% : 0.000001s : 8: predicate.fold_const_symbol 0.81% : 0.000003s : 16: predicate.get_grad_eliminate 0.29% : 0.000001s : 8: predicate.graph_param_transform 0.67% : 0.000003s : 16: predicate.incorporate_call 0.62% : 0.000003s : 16: predicate.incorporate_call_switch 5.59% : 0.000023s : 95: predicate.inline 0.94% : 0.000004s : 16: predicate.inline_without_move 0.36% : 0.000002s : 16: predicate.j_node_and_user_rematch 0.90% : 0.000004s : 16: predicate.less_batch_normalization 1.65% : 0.000007s : 37: predicate.list_to_tuple_eliminator_ 2.30% : 0.000010s : 58: predicate.load_eliminater 0.86% : 0.000004s : 8: predicate.loop_unroll_after_grad 2.73% : 0.000011s : 36: predicate.loop_unroll_before_grad 1.69% : 0.000007s : 37: predicate.make_slice_get_slice_eliminator 0.74% : 0.000003s : 16: predicate.merge_addn 0.69% : 0.000003s : 16: predicate.micro_step_allgather_replace 0.68% : 0.000003s : 16: predicate.mini_step_allgather_replace 0.91% : 0.000004s : 21: predicate.minmaximum_grad 1.05% : 0.000004s : 8: predicate.mutable_eliminate 0.38% : 0.000002s : 8: predicate.opt_reshape 0.38% : 0.000002s : 8: predicate.parallel_virtual_node 1.36% : 0.000006s : 26: predicate.partial_defer_inline 1.21% : 0.000005s : 29: predicate.partial_eliminate 0.91% : 0.000004s : 21: predicate.print_const_string_wrapper 0.73% : 0.000003s : 16: predicate.reduce_all_const_elim 1.15% : 0.000005s : 21: predicate.reduce_eliminate 2.36% : 0.000010s : 58: predicate.redundant_stop_gradient_eliminater 0.43% : 0.000002s : 16: predicate.remove_not_recompute_node 1.19% : 0.000005s : 37: predicate.replace_applicator 0.39% : 0.000002s : 16: predicate.replace_old_param 0.26% : 0.000001s : 8: predicate.reset_defer_inline 0.94% : 0.000004s : 21: predicate.reshape_eliminate 0.73% : 0.000003s : 16: predicate.row_tensor_add_zeros_like 0.43% : 0.000002s : 8: predicate.row_tensor_eliminate 0.90% : 0.000004s : 16: predicate.same_eliminate 0.49% : 0.000002s : 16: predicate.set_cell_output_no_recompute 0.84% : 0.000004s : 16: predicate.shard_identity_eliminate 0.75% : 0.000003s : 16: predicate.special_op_eliminate 0.79% : 0.000003s : 16: predicate.specialize_transform 0.93% : 0.000004s : 16: predicate.split_environ_get_set_with_tuple_value 0.84% : 0.000004s : 16: predicate.stack_unstack_eliminate 0.40% : 0.000002s : 8: predicate.switch_call_monad_eliminater 1.15% : 0.000005s : 26: predicate.switch_defer_inline 1.77% : 0.000007s : 42: predicate.switch_layer_defer_inline 5.85% : 0.000025s : 86: predicate.switch_simplify 0.90% : 0.000004s : 21: predicate.tile_eliminate 0.89% : 0.000004s : 21: predicate.transpose_eliminate 1.78% : 0.000007s : 37: predicate.tuple_list_convert_item_index_to_positive 1.90% : 0.000008s : 37: predicate.tuple_list_get_item_const_eliminator 1.71% : 0.000007s : 37: predicate.tuple_list_get_item_depend_reorder 3.14% : 0.000013s : 53: predicate.tuple_list_get_item_eliminator 1.74% : 0.000007s : 37: predicate.tuple_list_get_set_item_eliminator 2.49% : 0.000010s : 53: predicate.tuple_list_set_item_eliminator 1.48% : 0.000006s : 37: predicate.tuple_to_list_eliminator_ 2.24% : 0.000009s : 58: predicate.updatestate_pure_node_eliminater 2.96% : 0.000012s : 74: predicate.updatestate_useless_node_eliminater 0.38% : 0.000002s : 8: predicate.value_based_eliminate 0.75% : 0.000003s : 16: predicate.virtual_dataset_eliminate 0.78% : 0.000003s : 16: predicate.virtual_output_eliminate 0.33% : 0.000001s : 8: predicate.virtual_view_grad_eliminate 0.47% : 0.000002s : 8: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.032842 32 94.35% : 0.030986s : 25: func_graph_cloner_run.FuncGraphClonerGraph 5.65% : 0.001856s : 7: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 1.484713 192 0.00% : 0.000005s : 1: ForceFp32Comm 0.45% : 0.006713s : 1: add_attr 0.45% : 0.006694s : 1: add_attr_with_inline 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.01% : 0.000099s : 1: add_recomputation 0.00% : 0.000004s : 1: assign_add_opt 0.01% : 0.000105s : 1: auto_monad 0.00% : 0.000033s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000006s : 1: bias_add_comm_swap 0.05% : 0.000737s : 1: bootstrap 0.00% : 0.000036s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000027s : 1: control_data_broadcast_order 0.00% : 0.000017s : 1: convert_after_rewriter 0.00% : 0.000044s : 1: cse_after_recomputation 0.00% : 0.000007s : 1: dataset_repeat_opt 0.00% : 0.000006s : 1: detach_backward 0.00% : 0.000021s : 1: environ_conv 0.00% : 0.000036s : 1: event_method 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000007s : 1: get_jit_bprop_graph 0.00% : 0.000010s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000004s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000006s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.00% : 0.000007s : 1: label_fine_grained_interleaved_index 0.00% : 0.000008s : 1: label_micro_interleaved_index 0.03% : 0.000507s : 1: loop_unroll 0.00% : 0.000004s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.05% : 0.000762s : 1: mutable_eliminate 0.00% : 0.000010s : 1: offloading_packed_experts 0.00% : 0.000023s : 1: opt.transform.loop_unroll_optimizer 0.00% : 0.000026s : 1: opt.transform.mutable_eliminate 6.76% : 0.100407s : 78: opt.transform.opt_a 0.00% : 0.000060s : 1: opt.transform.opt_after_cconv 0.00% : 0.000044s : 1: opt.transform.opt_after_jit_grad 0.02% : 0.000287s : 28: opt.transform.opt_b 0.01% : 0.000132s : 2: opt.transform.opt_trans_graph 0.00% : 0.000070s : 4: opt.transform.symbol_engine_opt 7.29% : 0.108238s : 1: opt_a 0.01% : 0.000172s : 1: opt_after_cconv 0.04% : 0.000527s : 1: opt_after_jit_grad 0.03% : 0.000438s : 1: opt_b 7.51% : 0.111479s : 1: optimize 0.00% : 0.000032s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000013s : 1: order_py_execute_after_rewriter 0.00% : 0.000036s : 1: overlap_grad_flash_sp 0.00% : 0.000005s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000011s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000005s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000005s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000010s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000005s : 1: overlap_recompute_comm 0.00% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000005s : 1: pipeline_parallel_scheduler 0.00% : 0.000005s : 1: pipeline_split 0.00% : 0.000050s : 1: pre_auto_parallel 0.00% : 0.000033s : 1: py_interpret_to_execute 0.00% : 0.000024s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000061s : 1: remove_dup_value 0.27% : 0.003984s : 1: renormalize.infer 0.17% : 0.002456s : 1: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000009s : 1: rewriter_after_jit_bprop_graph 0.02% : 0.000270s : 1: rewriter_after_opt_a 0.01% : 0.000102s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000006s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000006s : 1: split_matmul_comm_elemetwise 0.00% : 0.000014s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000126s : 1: symbol_engine_optimizer 0.01% : 0.000165s : 1: tuple_transform 76.74% : 1.139369s : 1: type_inference . [hook] pytest_runtest_teardown:test_swiglu[False-float32--1-shape1] tests/st/infer/ops/test_internal_ops/test_swiglu_v2.py::test_swiglu[False-float32--1-shape1],max_mem:100.0M [WARNING] ME(161016:281473848516400,MainProcess):2026-01-29-17:37:52.686.170 [mindspore/context.py:1334] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. TotalTime = 1.28158, [21] [bootstrap]: 0.0839551 [type_inference]: 1.08888 [event_method]: 2.643e-05 [auto_monad]: 9.365e-05 [graph_reusing]: 6.90998e-06 [inline]: 2.96001e-06 [add_attr]: 0.094083, [1] [add_attr_with_inline]: 0.09406, [1] [Cycle 1]: 0.00011757, [2] [tag_attr]: 4.474e-05 [meta_addattr_fg_expand]: 1.35e-05 [parallel-infer-symbol]: 4.99e-06 [pre_auto_parallel]: 5.601e-05 [insert-virtual-dataset]: 2.73e-06 [parallel-infer-symbol-second]: 8.80013e-07 [dataset_repeat_opt]: 2.59999e-06 [pipeline_split]: 1.84e-06 [optimize]: 0.0135017, [53] [py_interpret_to_execute]: 5.316e-05 [rewriter_before_opt_a]: 0.00010676 [opt_a]: 0.00991866, [2] [Cycle 1]: 0.00846463, [45] [expand_dump_flag]: 3.42997e-06 [switch_simplify]: 4.676e-05 [loop_unroll]: 3.058e-05 [a_1]: 0.00107204 [with_stream_mark]: 2.702e-05 [recompute_prepare]: 2.007e-05 [updatestate_depend_eliminate]: 8.18999e-06 [updatestate_assign_eliminate]: 6.41e-06 [updatestate_loads_eliminate]: 5.78002e-06 [parameter_eliminate]: 2.34001e-06 [a_2]: 0.00021057 [accelerated_algorithm]: 1.678e-05 [shard]: 2.89999e-06 [meta_shard_fg_expand]: 4.32998e-06 [shard_inline]: 1.362e-05 [merge_send_recv]: 1.322e-05 [auto_parallel]: 1.174e-05 [parallel]: 3.032e-05 [flash_sp]: 1.073e-05 [merge_comm]: 7.40998e-06 [allreduce_fusion]: 7.05e-06 [matmul_add_comm_reduction]: 1.641e-05 [allreduce_slice_to_reducescatter]: 9.20001e-07 [virtual_shard_identity]: 1.579e-05 [virtual_dataset]: 1.327e-05 [get_grad_eliminate_]: 1.263e-05 [virtual_output]: 1.294e-05 [merge_forward]: 9.67999e-06 [cell_reuse_recompute_pass]: 1.52999e-06 [offload_activation]: 1.641e-05 [cell_reuse_handle_not_recompute_node_pass]: 2.889e-05 [merge_recompute_call_nodes]: 1.55999e-06 [before_grad]: 2.386e-05 [set_forward_comm_id_for_comm_node_pass]: 7.5e-06 [meta_fg_expand]: 7.17002e-06 [flash_sp_send_recv_attached]: 2.79001e-06 [receive_attached]: 2.11e-06 [after_resolve]: 1.779e-05 [a_after_grad]: 2.286e-05 [renormalize]: 0.0054837 [add_forward_monad_depend]: 1.261e-05 [auto_monad_grad]: 3.03998e-06 [auto_monad_eliminator]: 3.27e-05 [cse]: 0.00068327 [a_3]: 0.00011998 [Cycle 2]: 0.00143831, [45] [expand_dump_flag]: 2.87002e-06 [switch_simplify]: 1.695e-05 [loop_unroll]: 1.357e-05 [a_1]: 0.00038544 [with_stream_mark]: 3.438e-05 [recompute_prepare]: 1.364e-05 [updatestate_depend_eliminate]: 8.75001e-06 [updatestate_assign_eliminate]: 5.91998e-06 [updatestate_loads_eliminate]: 6.04999e-06 [parameter_eliminate]: 2.31e-06 [a_2]: 0.00018112 [accelerated_algorithm]: 1.344e-05 [shard]: 2.88998e-06 [meta_shard_fg_expand]: 4.95999e-06 [shard_inline]: 1.196e-05 [merge_send_recv]: 1.414e-05 [auto_parallel]: 1.478e-05 [parallel]: 1.032e-05 [flash_sp]: 4.40999e-06 [merge_comm]: 7.35998e-06 [allreduce_fusion]: 6.51e-06 [matmul_add_comm_reduction]: 1.587e-05 [allreduce_slice_to_reducescatter]: 7.00005e-07 [virtual_shard_identity]: 1.473e-05 [virtual_dataset]: 1.22e-05 [get_grad_eliminate_]: 1.266e-05 [virtual_output]: 1.118e-05 [merge_forward]: 7.2e-06 [cell_reuse_recompute_pass]: 3.35e-06 [offload_activation]: 1.643e-05 [cell_reuse_handle_not_recompute_node_pass]: 2.559e-05 [merge_recompute_call_nodes]: 1.64e-06 [before_grad]: 2.112e-05 [set_forward_comm_id_for_comm_node_pass]: 7.71999e-06 [meta_fg_expand]: 5.87999e-06 [flash_sp_send_recv_attached]: 1.85001e-06 [receive_attached]: 2.39001e-06 [after_resolve]: 1.898e-05 [a_after_grad]: 1.885e-05 [renormalize]: 1.59984e-07 [add_forward_monad_depend]: 2.81999e-06 [auto_monad_grad]: 3.02002e-06 [auto_monad_eliminator]: 2.637e-05 [cse]: 0.00010313 [a_3]: 8.272e-05 [py_interpret_to_execute_after_opt_a]: 2.736e-05 [slice_cell_reuse_recomputed_activation]: 2.17999e-06 [rewriter_after_opt_a]: 0.00031474 [convert_after_rewriter]: 1.526e-05 [order_py_execute_after_rewriter]: 9.20999e-06 [mutable_eliminate]: 0.00081612 [opt_b]: 0.00049242, [1] [Cycle 1]: 0.00048356, [7] [b_1]: 0.0003374 [b_2]: 1.496e-05 [updatestate_depend_eliminate]: 1.194e-05 [updatestate_assign_eliminate]: 5.96e-06 [updatestate_loads_eliminate]: 5.96e-06 [renormalize]: 7.59988e-07 [cse]: 6.146e-05 [optimize_parallel_all_gather_comm]: 2.964e-05 [overlap_param_gather]: 2.14999e-06 [cconv]: 3.859e-05 [loop_unroll]: 0.00055532 [opt_after_cconv]: 0.00017829, [1] [Cycle 1]: 0.00016984, [7] [c_1]: 6.2e-05 [parameter_eliminate]: 5.77999e-06 [updatestate_depend_eliminate]: 1.109e-05 [updatestate_assign_eliminate]: 5.34e-06 [updatestate_loads_eliminate]: 5.10999e-06 [cse]: 4.153e-05 [renormalize]: 4.10015e-07 [remove_dup_value]: 6.612e-05 [tuple_transform]: 0.00016802, [1] [Cycle 1]: 0.00016283, [4] [d_1]: 0.00012448 [none_parameter_eliminate]: 2.29001e-06 [renormalize]: 1.8999e-07 [switch_simplify]: 1.354e-05 [partial_unused_args_eliminate]: 2.26e-06 [add_recomputation]: 0.0001 [cse_after_recomputation]: 4.391e-05, [1] [Cycle 1]: 3.794e-05, [1] [cse]: 3.114e-05 [environ_conv]: 1.567e-05 [swap_dp_allreduce_reducescatter]: 1.014e-05 [bias_add_comm_swap]: 3.5e-06 [label_micro_interleaved_index]: 5.35999e-06 [label_fine_grained_interleaved_index]: 3.2e-06 [merge_cast_opt]: 1.42999e-06 [slice_recompute_activation]: 2.44001e-06 [micro_interleaved_order_control]: 2.71e-06 [assign_add_opt]: 1.29998e-06 [ForceFp32Comm]: 9.79984e-07 [remove_cast_before_assign_add]: 1.29003e-06 [full_micro_interleaved_order_control]: 2.78e-06 [reorder_send_recv_between_fp_bp]: 3.13998e-06 [comm_op_add_attrs]: 1.32e-06 [add_comm_op_reuse_tag]: 1.05001e-06 [interleave_split_concat_branches]: 1.89999e-06 [interleave_parallel_branches]: 1.14998e-06 [overlap_opt_shard_in_pipeline]: 1.37999e-06 [overlap_opt_shard_grad_in_pipeline]: 2.58e-06 [control_data_broadcast_order]: 2.295e-05 [grouped_pairwise_exchange_alltoall]: 1.63002e-06 [offloading_packed_experts]: 7.11999e-06 [overlap_recompute_and_grad_model_parallel]: 7.3e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.35001e-06 [overlap_recompute_allgather_and_fa_grad]: 1.78002e-06 [overlap_recompute_comm]: 2.69001e-06 [overlap_grad_ring_attention]: 5.76998e-06 [overlap_grad_flash_sp]: 3.239e-05 [begin_end_overlap_inline]: 8.2e-07 [split_matmul_comm_elemetwise]: 2.54001e-06 [split_layernorm_comm]: 2.01e-06 [handle_group_info]: 9.89996e-07 [symbol_engine_optimizer]: 0.00012915, [1] [Cycle 1]: 0.00012424, [6] [build]: 1.344e-05 [elim_shapecalc]: 1.929e-05 [elim_not_effective]: 2.431e-05 [opt_reshape]: 1.238e-05 [fold_const_symbol]: 2.12e-05 [renormalize]: 2.19996e-07 [detach_backward]: 3.33998e-06 [pipeline_parallel_scheduler]: 1.64998e-06 [auto_monad_reorder]: 2.815e-05 [get_jit_bprop_graph]: 2.34001e-06 [rewriter_after_jit_bprop_graph]: 6.51999e-06 [opt_after_jit_grad]: 0.0005857 [validate]: 7.581e-05 Sums bootstrap : 0.083955s : 7.08% type_inference : 1.088881s : 91.80% event_method : 0.000026s : 0.00% auto_monad : 0.000094s : 0.01% graph_reusing : 0.000007s : 0.00% inline : 0.000003s : 0.00% add_attr.add_attr_with_inline.tag_attr : 0.000045s : 0.00% add_attr.add_attr_with_inline.meta_addattr_fg_expand : 0.000013s : 0.00% parallel-infer-symbol : 0.000005s : 0.00% pre_auto_parallel : 0.000056s : 0.00% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000003s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000053s : 0.00% optimize.rewriter_before_opt_a : 0.000107s : 0.01% optimize.opt_a.expand_dump_flag : 0.000006s : 0.00% optimize.opt_a.switch_simplify : 0.000064s : 0.01% optimize.opt_a.loop_unroll : 0.000044s : 0.00% optimize.opt_a.a_1 : 0.001457s : 0.12% optimize.opt_a.with_stream_mark : 0.000061s : 0.01% optimize.opt_a.recompute_prepare : 0.000034s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000017s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000012s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.00% optimize.opt_a.parameter_eliminate : 0.000005s : 0.00% optimize.opt_a.a_2 : 0.000392s : 0.03% optimize.opt_a.accelerated_algorithm : 0.000030s : 0.00% optimize.opt_a.shard : 0.000006s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000009s : 0.00% optimize.opt_a.shard_inline : 0.000026s : 0.00% optimize.opt_a.merge_send_recv : 0.000027s : 0.00% optimize.opt_a.auto_parallel : 0.000027s : 0.00% optimize.opt_a.parallel : 0.000041s : 0.00% optimize.opt_a.flash_sp : 0.000015s : 0.00% optimize.opt_a.merge_comm : 0.000015s : 0.00% optimize.opt_a.allreduce_fusion : 0.000014s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000032s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000031s : 0.00% optimize.opt_a.virtual_dataset : 0.000025s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000025s : 0.00% optimize.opt_a.virtual_output : 0.000024s : 0.00% optimize.opt_a.merge_forward : 0.000017s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000005s : 0.00% optimize.opt_a.offload_activation : 0.000033s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000054s : 0.00% optimize.opt_a.merge_recompute_call_nodes : 0.000003s : 0.00% optimize.opt_a.before_grad : 0.000045s : 0.00% optimize.opt_a.set_forward_comm_id_for_comm_node_pass : 0.000015s : 0.00% optimize.opt_a.meta_fg_expand : 0.000013s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.00% optimize.opt_a.receive_attached : 0.000005s : 0.00% optimize.opt_a.after_resolve : 0.000037s : 0.00% optimize.opt_a.a_after_grad : 0.000042s : 0.00% optimize.opt_a.renormalize : 0.005484s : 0.46% optimize.opt_a.add_forward_monad_depend : 0.000015s : 0.00% optimize.opt_a.auto_monad_grad : 0.000006s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000059s : 0.00% optimize.opt_a.cse : 0.000786s : 0.07% optimize.opt_a.a_3 : 0.000203s : 0.02% optimize.py_interpret_to_execute_after_opt_a : 0.000027s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000315s : 0.03% optimize.convert_after_rewriter : 0.000015s : 0.00% optimize.order_py_execute_after_rewriter : 0.000009s : 0.00% optimize.mutable_eliminate : 0.000816s : 0.07% optimize.opt_b.b_1 : 0.000337s : 0.03% optimize.opt_b.b_2 : 0.000015s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000012s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000006s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000061s : 0.01% optimize.optimize_parallel_all_gather_comm : 0.000030s : 0.00% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000039s : 0.00% optimize.loop_unroll : 0.000555s : 0.05% optimize.opt_after_cconv.c_1 : 0.000062s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000006s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000011s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.cse : 0.000042s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000066s : 0.01% optimize.tuple_transform.d_1 : 0.000124s : 0.01% optimize.tuple_transform.none_parameter_eliminate : 0.000002s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.tuple_transform.switch_simplify : 0.000014s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_recomputation : 0.000100s : 0.01% optimize.cse_after_recomputation.cse : 0.000031s : 0.00% optimize.environ_conv : 0.000016s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000010s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000005s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000003s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000003s : 0.00% optimize.assign_add_opt : 0.000001s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000002s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000023s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000007s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000007s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000002s : 0.00% optimize.overlap_recompute_comm : 0.000003s : 0.00% optimize.overlap_grad_ring_attention : 0.000006s : 0.00% optimize.overlap_grad_flash_sp : 0.000032s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000003s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000013s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000019s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000024s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000012s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000021s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% detach_backward : 0.000003s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000028s : 0.00% get_jit_bprop_graph : 0.000002s : 0.00% rewriter_after_jit_bprop_graph : 0.000007s : 0.00% opt_after_jit_grad : 0.000586s : 0.05% validate : 0.000076s : 0.01% Time group info: ------[substitution.] 0.000498 111 9.97% : 0.000050s : 6: substitution.arithmetic_simplify 0.91% : 0.000005s : 6: substitution.elim_not_effective 2.58% : 0.000013s : 6: substitution.float_tuple_getitem_switch 0.63% : 0.000003s : 6: substitution.fold_const_symbol 2.03% : 0.000010s : 8: substitution.graph_param_transform 45.74% : 0.000228s : 5: substitution.inline 1.69% : 0.000008s : 12: substitution.j_node_and_user_rematch 2.15% : 0.000011s : 4: substitution.minmaximum_grad 2.48% : 0.000012s : 12: substitution.remove_not_recompute_node 1.16% : 0.000006s : 2: substitution.replace_old_param 7.13% : 0.000035s : 8: substitution.tuple_list_convert_item_index_to_positive 3.26% : 0.000016s : 8: substitution.tuple_list_get_item_const_eliminator 4.55% : 0.000023s : 8: substitution.tuple_list_get_item_depend_reorder 11.14% : 0.000055s : 12: substitution.tuple_list_get_item_eliminator 4.57% : 0.000023s : 8: substitution.tuple_list_get_set_item_eliminator ------[type_inference.] 1.088774 2 99.63% : 1.084793s : 1: type_inference.infer 0.37% : 0.003980s : 1: type_inference.specialize ------[replace.] 0.000049 5 100.00% : 0.000049s : 5: replace.inline ------[match.] 0.000224 5 100.00% : 0.000224s : 5: match.inline ------[predicate.] 0.000404 2113 0.91% : 0.000004s : 21: predicate.accumulaten_eliminater 0.84% : 0.000003s : 8: predicate.ad_related_special_op_eliminate 0.65% : 0.000003s : 16: predicate.addn_check_dump 0.91% : 0.000004s : 21: predicate.addn_zero_filter 0.82% : 0.000003s : 21: predicate.adjust_all_reduce_mul_add 2.56% : 0.000010s : 37: predicate.arithmetic_simplify 1.01% : 0.000004s : 21: predicate.cast_eliminate 0.80% : 0.000003s : 16: predicate.check_bprop_eliminate 0.67% : 0.000003s : 16: predicate.compare_switch_simplify 0.21% : 0.000001s : 8: predicate.const_output_eliminate 0.74% : 0.000003s : 16: predicate.depend_value_elim 0.89% : 0.000004s : 21: predicate.dict_get_item_const_eliminator 1.02% : 0.000004s : 21: predicate.dict_get_item_eliminator 0.95% : 0.000004s : 21: predicate.dict_set_item_eliminator 1.05% : 0.000004s : 16: predicate.dumpgradient_eliminate 0.21% : 0.000001s : 8: predicate.elim_not_effective 0.41% : 0.000002s : 8: predicate.elim_shapecalc_of_broadcastargs 1.22% : 0.000005s : 29: predicate.environ_add_const_eliminate 1.15% : 0.000005s : 29: predicate.environ_get_add_eliminate 1.14% : 0.000005s : 29: predicate.environ_get_depend_swap 1.92% : 0.000008s : 45: predicate.environ_get_eliminate 1.18% : 0.000005s : 29: predicate.environ_get_set_eliminate 1.06% : 0.000004s : 26: predicate.exchange_switch_depend_value 1.80% : 0.000007s : 26: predicate.float_depend_g_call 0.66% : 0.000003s : 16: predicate.float_environ_get_switch 1.15% : 0.000005s : 24: predicate.float_tuple_getitem_switch 0.20% : 0.000001s : 8: predicate.fold_const_symbol 0.73% : 0.000003s : 16: predicate.get_grad_eliminate 0.22% : 0.000001s : 8: predicate.graph_param_transform 0.71% : 0.000003s : 16: predicate.incorporate_call 0.65% : 0.000003s : 16: predicate.incorporate_call_switch 5.83% : 0.000024s : 95: predicate.inline 0.98% : 0.000004s : 16: predicate.inline_without_move 0.35% : 0.000001s : 16: predicate.j_node_and_user_rematch 0.99% : 0.000004s : 16: predicate.less_batch_normalization 1.81% : 0.000007s : 37: predicate.list_to_tuple_eliminator_ 2.29% : 0.000009s : 58: predicate.load_eliminater 1.03% : 0.000004s : 8: predicate.loop_unroll_after_grad 1.65% : 0.000007s : 36: predicate.loop_unroll_before_grad 1.66% : 0.000007s : 37: predicate.make_slice_get_slice_eliminator 0.69% : 0.000003s : 16: predicate.merge_addn 0.76% : 0.000003s : 16: predicate.micro_step_allgather_replace 0.70% : 0.000003s : 16: predicate.mini_step_allgather_replace 0.85% : 0.000003s : 21: predicate.minmaximum_grad 1.15% : 0.000005s : 8: predicate.mutable_eliminate 0.38% : 0.000002s : 8: predicate.opt_reshape 0.40% : 0.000002s : 8: predicate.parallel_virtual_node 1.37% : 0.000006s : 26: predicate.partial_defer_inline 1.29% : 0.000005s : 29: predicate.partial_eliminate 0.85% : 0.000003s : 21: predicate.print_const_string_wrapper 0.69% : 0.000003s : 16: predicate.reduce_all_const_elim 1.14% : 0.000005s : 21: predicate.reduce_eliminate 2.33% : 0.000009s : 58: predicate.redundant_stop_gradient_eliminater 0.39% : 0.000002s : 16: predicate.remove_not_recompute_node 1.22% : 0.000005s : 37: predicate.replace_applicator 0.47% : 0.000002s : 16: predicate.replace_old_param 0.24% : 0.000001s : 8: predicate.reset_defer_inline 0.94% : 0.000004s : 21: predicate.reshape_eliminate 0.69% : 0.000003s : 16: predicate.row_tensor_add_zeros_like 0.43% : 0.000002s : 8: predicate.row_tensor_eliminate 1.29% : 0.000005s : 16: predicate.same_eliminate 0.51% : 0.000002s : 16: predicate.set_cell_output_no_recompute 0.86% : 0.000003s : 16: predicate.shard_identity_eliminate 0.84% : 0.000003s : 16: predicate.special_op_eliminate 0.82% : 0.000003s : 16: predicate.specialize_transform 1.00% : 0.000004s : 16: predicate.split_environ_get_set_with_tuple_value 0.85% : 0.000003s : 16: predicate.stack_unstack_eliminate 0.37% : 0.000002s : 8: predicate.switch_call_monad_eliminater 1.21% : 0.000005s : 26: predicate.switch_defer_inline 1.87% : 0.000008s : 42: predicate.switch_layer_defer_inline 3.98% : 0.000016s : 86: predicate.switch_simplify 0.89% : 0.000004s : 21: predicate.tile_eliminate 0.87% : 0.000004s : 21: predicate.transpose_eliminate 1.92% : 0.000008s : 37: predicate.tuple_list_convert_item_index_to_positive 1.92% : 0.000008s : 37: predicate.tuple_list_get_item_const_eliminator 1.64% : 0.000007s : 37: predicate.tuple_list_get_item_depend_reorder 3.17% : 0.000013s : 53: predicate.tuple_list_get_item_eliminator 1.74% : 0.000007s : 37: predicate.tuple_list_get_set_item_eliminator 2.45% : 0.000010s : 53: predicate.tuple_list_set_item_eliminator 1.65% : 0.000007s : 37: predicate.tuple_to_list_eliminator_ 2.23% : 0.000009s : 58: predicate.updatestate_pure_node_eliminater 3.23% : 0.000013s : 74: predicate.updatestate_useless_node_eliminater 0.40% : 0.000002s : 8: predicate.value_based_eliminate 0.76% : 0.000003s : 16: predicate.virtual_dataset_eliminate 0.74% : 0.000003s : 16: predicate.virtual_output_eliminate 0.32% : 0.000001s : 8: predicate.virtual_view_grad_eliminate 0.42% : 0.000002s : 8: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.064741 32 97.74% : 0.063281s : 25: func_graph_cloner_run.FuncGraphClonerGraph 2.26% : 0.001460s : 7: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 1.397509 192 0.00% : 0.000004s : 1: ForceFp32Comm 6.73% : 0.094102s : 1: add_attr 6.73% : 0.094065s : 1: add_attr_with_inline 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.01% : 0.000106s : 1: add_recomputation 0.00% : 0.000005s : 1: assign_add_opt 0.01% : 0.000100s : 1: auto_monad 0.00% : 0.000034s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000008s : 1: bias_add_comm_swap 6.01% : 0.084014s : 1: bootstrap 0.00% : 0.000043s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000027s : 1: control_data_broadcast_order 0.00% : 0.000020s : 1: convert_after_rewriter 0.00% : 0.000047s : 1: cse_after_recomputation 0.00% : 0.000007s : 1: dataset_repeat_opt 0.00% : 0.000008s : 1: detach_backward 0.00% : 0.000019s : 1: environ_conv 0.00% : 0.000035s : 1: event_method 0.00% : 0.000007s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000011s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000005s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000008s : 1: label_micro_interleaved_index 0.04% : 0.000567s : 1: loop_unroll 0.00% : 0.000006s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.06% : 0.000829s : 1: mutable_eliminate 0.00% : 0.000010s : 1: offloading_packed_experts 0.00% : 0.000024s : 1: opt.transform.loop_unroll_optimizer 0.00% : 0.000030s : 1: opt.transform.mutable_eliminate 0.17% : 0.002404s : 78: opt.transform.opt_a 0.00% : 0.000061s : 1: opt.transform.opt_after_cconv 0.00% : 0.000047s : 1: opt.transform.opt_after_jit_grad 0.02% : 0.000313s : 28: opt.transform.opt_b 0.01% : 0.000136s : 2: opt.transform.opt_trans_graph 0.01% : 0.000072s : 4: opt.transform.symbol_engine_opt 0.71% : 0.009923s : 1: opt_a 0.01% : 0.000182s : 1: opt_after_cconv 0.04% : 0.000599s : 1: opt_after_jit_grad 0.04% : 0.000497s : 1: opt_b 0.97% : 0.013508s : 1: optimize 0.00% : 0.000033s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000013s : 1: order_py_execute_after_rewriter 0.00% : 0.000036s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000010s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000006s : 1: overlap_param_gather 0.00% : 0.000005s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000010s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000006s : 1: overlap_recompute_comm 0.00% : 0.000010s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000005s : 1: pipeline_parallel_scheduler 0.00% : 0.000005s : 1: pipeline_split 0.00% : 0.000061s : 1: pre_auto_parallel 0.00% : 0.000058s : 1: py_interpret_to_execute 0.00% : 0.000032s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.01% : 0.000070s : 1: remove_dup_value 0.27% : 0.003799s : 1: renormalize.infer 0.12% : 0.001670s : 1: renormalize.specialize 0.00% : 0.000007s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000010s : 1: rewriter_after_jit_bprop_graph 0.02% : 0.000324s : 1: rewriter_after_opt_a 0.01% : 0.000112s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000013s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000132s : 1: symbol_engine_optimizer 0.01% : 0.000171s : 1: tuple_transform 77.92% : 1.088921s : 1: type_inference . [hook] pytest_runtest_teardown:test_swiglu[False-float16--1-shape0] tests/st/infer/ops/test_internal_ops/test_swiglu_v2.py::test_swiglu[False-float16--1-shape0],max_mem:100.0M [WARNING] ME(161016:281473848516400,MainProcess):2026-01-29-17:37:55.977.014 [mindspore/context.py:1334] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. TotalTime = 1.17981, [21] [bootstrap]: 0.00083148 [type_inference]: 1.0539 [event_method]: 2.363e-05 [auto_monad]: 8.836e-05 [graph_reusing]: 6.73e-06 [inline]: 3.05002e-06 [add_attr]: 0.109909, [1] [add_attr_with_inline]: 0.109891, [1] [Cycle 1]: 9.032e-05, [2] [tag_attr]: 2.756e-05 [meta_addattr_fg_expand]: 6.36e-06 [parallel-infer-symbol]: 3.95e-06 [pre_auto_parallel]: 5.097e-05 [insert-virtual-dataset]: 2.54999e-06 [parallel-infer-symbol-second]: 7.80012e-07 [dataset_repeat_opt]: 2.51998e-06 [pipeline_split]: 1.71e-06 [optimize]: 0.0140119, [53] [py_interpret_to_execute]: 4.364e-05 [rewriter_before_opt_a]: 0.00010474 [opt_a]: 0.0103263, [2] [Cycle 1]: 0.00840414, [45] [expand_dump_flag]: 3.37002e-06 [switch_simplify]: 4.854e-05 [loop_unroll]: 3.016e-05 [a_1]: 0.00108656 [with_stream_mark]: 2.555e-05 [recompute_prepare]: 1.826e-05 [updatestate_depend_eliminate]: 8.33001e-06 [updatestate_assign_eliminate]: 6.66999e-06 [updatestate_loads_eliminate]: 5.89e-06 [parameter_eliminate]: 2.18998e-06 [a_2]: 0.00021024 [accelerated_algorithm]: 1.489e-05 [shard]: 3.07002e-06 [meta_shard_fg_expand]: 3.71999e-06 [shard_inline]: 1.256e-05 [merge_send_recv]: 1.426e-05 [auto_parallel]: 1.132e-05 [parallel]: 3.069e-05 [flash_sp]: 1.22e-05 [merge_comm]: 7.06999e-06 [allreduce_fusion]: 6.91001e-06 [matmul_add_comm_reduction]: 1.653e-05 [allreduce_slice_to_reducescatter]: 9.00007e-07 [virtual_shard_identity]: 1.685e-05 [virtual_dataset]: 1.331e-05 [get_grad_eliminate_]: 1.424e-05 [virtual_output]: 1.353e-05 [merge_forward]: 8.17e-06 [cell_reuse_recompute_pass]: 1.96e-06 [offload_activation]: 1.727e-05 [cell_reuse_handle_not_recompute_node_pass]: 2.859e-05 [merge_recompute_call_nodes]: 1.52001e-06 [before_grad]: 2.358e-05 [set_forward_comm_id_for_comm_node_pass]: 8.11002e-06 [meta_fg_expand]: 5.76e-06 [flash_sp_send_recv_attached]: 3.21999e-06 [receive_attached]: 2.83e-06 [after_resolve]: 1.894e-05 [a_after_grad]: 2.188e-05 [renormalize]: 0.0060531 [add_forward_monad_depend]: 1.224e-05 [auto_monad_grad]: 3.47002e-06 [auto_monad_eliminator]: 3.329e-05 [cse]: 7.681e-05 [a_3]: 0.00010857 [Cycle 2]: 0.00190651, [45] [expand_dump_flag]: 2.64001e-06 [switch_simplify]: 1.656e-05 [loop_unroll]: 1.308e-05 [a_1]: 0.00037487 [with_stream_mark]: 2.439e-05 [recompute_prepare]: 1.354e-05 [updatestate_depend_eliminate]: 8.92999e-06 [updatestate_assign_eliminate]: 5.95002e-06 [updatestate_loads_eliminate]: 5.80002e-06 [parameter_eliminate]: 2.44001e-06 [a_2]: 0.000179 [accelerated_algorithm]: 1.396e-05 [shard]: 2.98998e-06 [meta_shard_fg_expand]: 3.47002e-06 [shard_inline]: 1.262e-05 [merge_send_recv]: 1.311e-05 [auto_parallel]: 1.443e-05 [parallel]: 9.68002e-06 [flash_sp]: 4.91002e-06 [merge_comm]: 7.77998e-06 [allreduce_fusion]: 6.36998e-06 [matmul_add_comm_reduction]: 1.461e-05 [allreduce_slice_to_reducescatter]: 6.89994e-07 [virtual_shard_identity]: 1.416e-05 [virtual_dataset]: 1.222e-05 [get_grad_eliminate_]: 1.284e-05 [virtual_output]: 1.112e-05 [merge_forward]: 7.28999e-06 [cell_reuse_recompute_pass]: 3.53999e-06 [offload_activation]: 1.583e-05 [cell_reuse_handle_not_recompute_node_pass]: 2.488e-05 [merge_recompute_call_nodes]: 2.05002e-06 [before_grad]: 2.212e-05 [set_forward_comm_id_for_comm_node_pass]: 7.90998e-06 [meta_fg_expand]: 6.00002e-06 [flash_sp_send_recv_attached]: 2.66e-06 [receive_attached]: 2.83998e-06 [after_resolve]: 1.726e-05 [a_after_grad]: 1.852e-05 [renormalize]: 6.00121e-08 [add_forward_monad_depend]: 3.35e-06 [auto_monad_grad]: 2.17001e-06 [auto_monad_eliminator]: 1.793e-05 [cse]: 0.00058032 [a_3]: 9.602e-05 [py_interpret_to_execute_after_opt_a]: 2.835e-05 [slice_cell_reuse_recomputed_activation]: 2.35002e-06 [rewriter_after_opt_a]: 0.0003105 [convert_after_rewriter]: 1.473e-05 [order_py_execute_after_rewriter]: 9.31e-06 [mutable_eliminate]: 0.00084934 [opt_b]: 0.00047902, [1] [Cycle 1]: 0.00047007, [7] [b_1]: 0.00032123 [b_2]: 1.577e-05 [updatestate_depend_eliminate]: 1.278e-05 [updatestate_assign_eliminate]: 6.06e-06 [updatestate_loads_eliminate]: 5.82001e-06 [renormalize]: 1.34e-06 [cse]: 6.126e-05 [optimize_parallel_all_gather_comm]: 2.935e-05 [overlap_param_gather]: 2.14e-06 [cconv]: 3.97e-05 [loop_unroll]: 0.0006098 [opt_after_cconv]: 0.00018405, [1] [Cycle 1]: 0.00017523, [7] [c_1]: 6.326e-05 [parameter_eliminate]: 5.56e-06 [updatestate_depend_eliminate]: 1.213e-05 [updatestate_assign_eliminate]: 5.32001e-06 [updatestate_loads_eliminate]: 5.04e-06 [cse]: 4.461e-05 [renormalize]: 1.35001e-06 [remove_dup_value]: 6.441e-05 [tuple_transform]: 0.00017492, [1] [Cycle 1]: 0.00016902, [4] [d_1]: 0.00012853 [none_parameter_eliminate]: 2.11e-06 [renormalize]: 2.19996e-07 [switch_simplify]: 1.383e-05 [partial_unused_args_eliminate]: 2.37001e-06 [add_recomputation]: 9.976e-05 [cse_after_recomputation]: 4.731e-05, [1] [Cycle 1]: 4.075e-05, [1] [cse]: 3.253e-05 [environ_conv]: 1.648e-05 [swap_dp_allreduce_reducescatter]: 1.05e-05 [bias_add_comm_swap]: 3.16001e-06 [label_micro_interleaved_index]: 6.29001e-06 [label_fine_grained_interleaved_index]: 2.84001e-06 [merge_cast_opt]: 1.60001e-06 [slice_recompute_activation]: 2.34001e-06 [micro_interleaved_order_control]: 2.83e-06 [assign_add_opt]: 1.27e-06 [ForceFp32Comm]: 1.02998e-06 [remove_cast_before_assign_add]: 1.50999e-06 [full_micro_interleaved_order_control]: 2.39999e-06 [reorder_send_recv_between_fp_bp]: 3.26001e-06 [comm_op_add_attrs]: 1.12e-06 [add_comm_op_reuse_tag]: 1.05001e-06 [interleave_split_concat_branches]: 1.55001e-06 [interleave_parallel_branches]: 1.17999e-06 [overlap_opt_shard_in_pipeline]: 1.53002e-06 [overlap_opt_shard_grad_in_pipeline]: 1.80001e-06 [control_data_broadcast_order]: 2.38e-05 [grouped_pairwise_exchange_alltoall]: 1.81e-06 [offloading_packed_experts]: 7.55e-06 [overlap_recompute_and_grad_model_parallel]: 8.48999e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.84e-06 [overlap_recompute_allgather_and_fa_grad]: 1.54e-06 [overlap_recompute_comm]: 2.27999e-06 [overlap_grad_ring_attention]: 6.39001e-06 [overlap_grad_flash_sp]: 3.287e-05 [begin_end_overlap_inline]: 6.19999e-07 [split_matmul_comm_elemetwise]: 2.98e-06 [split_layernorm_comm]: 1.80001e-06 [handle_group_info]: 1.04e-06 [symbol_engine_optimizer]: 0.00014101, [1] [Cycle 1]: 0.00013549, [6] [build]: 1.393e-05 [elim_shapecalc]: 2.261e-05 [elim_not_effective]: 2.747e-05 [opt_reshape]: 1.368e-05 [fold_const_symbol]: 2.217e-05 [renormalize]: 6.89994e-07 [detach_backward]: 3.73999e-06 [pipeline_parallel_scheduler]: 1.52999e-06 [auto_monad_reorder]: 3.108e-05 [get_jit_bprop_graph]: 1.92001e-06 [rewriter_after_jit_bprop_graph]: 5.91e-06 [opt_after_jit_grad]: 0.00064979 [validate]: 8.167e-05 Sums bootstrap : 0.000831s : 0.08% type_inference : 1.053901s : 98.62% event_method : 0.000024s : 0.00% auto_monad : 0.000088s : 0.01% graph_reusing : 0.000007s : 0.00% inline : 0.000003s : 0.00% add_attr.add_attr_with_inline.tag_attr : 0.000028s : 0.00% add_attr.add_attr_with_inline.meta_addattr_fg_expand : 0.000006s : 0.00% parallel-infer-symbol : 0.000004s : 0.00% pre_auto_parallel : 0.000051s : 0.00% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000003s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000044s : 0.00% optimize.rewriter_before_opt_a : 0.000105s : 0.01% optimize.opt_a.expand_dump_flag : 0.000006s : 0.00% optimize.opt_a.switch_simplify : 0.000065s : 0.01% optimize.opt_a.loop_unroll : 0.000043s : 0.00% optimize.opt_a.a_1 : 0.001461s : 0.14% optimize.opt_a.with_stream_mark : 0.000050s : 0.00% optimize.opt_a.recompute_prepare : 0.000032s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000017s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000013s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.00% optimize.opt_a.parameter_eliminate : 0.000005s : 0.00% optimize.opt_a.a_2 : 0.000389s : 0.04% optimize.opt_a.accelerated_algorithm : 0.000029s : 0.00% optimize.opt_a.shard : 0.000006s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.00% optimize.opt_a.shard_inline : 0.000025s : 0.00% optimize.opt_a.merge_send_recv : 0.000027s : 0.00% optimize.opt_a.auto_parallel : 0.000026s : 0.00% optimize.opt_a.parallel : 0.000040s : 0.00% optimize.opt_a.flash_sp : 0.000017s : 0.00% optimize.opt_a.merge_comm : 0.000015s : 0.00% optimize.opt_a.allreduce_fusion : 0.000013s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000031s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000031s : 0.00% optimize.opt_a.virtual_dataset : 0.000026s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000027s : 0.00% optimize.opt_a.virtual_output : 0.000025s : 0.00% optimize.opt_a.merge_forward : 0.000015s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000005s : 0.00% optimize.opt_a.offload_activation : 0.000033s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000053s : 0.01% optimize.opt_a.merge_recompute_call_nodes : 0.000004s : 0.00% optimize.opt_a.before_grad : 0.000046s : 0.00% optimize.opt_a.set_forward_comm_id_for_comm_node_pass : 0.000016s : 0.00% optimize.opt_a.meta_fg_expand : 0.000012s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.00% optimize.opt_a.receive_attached : 0.000006s : 0.00% optimize.opt_a.after_resolve : 0.000036s : 0.00% optimize.opt_a.a_after_grad : 0.000040s : 0.00% optimize.opt_a.renormalize : 0.006053s : 0.57% optimize.opt_a.add_forward_monad_depend : 0.000016s : 0.00% optimize.opt_a.auto_monad_grad : 0.000006s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000051s : 0.00% optimize.opt_a.cse : 0.000657s : 0.06% optimize.opt_a.a_3 : 0.000205s : 0.02% optimize.py_interpret_to_execute_after_opt_a : 0.000028s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000311s : 0.03% optimize.convert_after_rewriter : 0.000015s : 0.00% optimize.order_py_execute_after_rewriter : 0.000009s : 0.00% optimize.mutable_eliminate : 0.000849s : 0.08% optimize.opt_b.b_1 : 0.000321s : 0.03% optimize.opt_b.b_2 : 0.000016s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000013s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000006s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000061s : 0.01% optimize.optimize_parallel_all_gather_comm : 0.000029s : 0.00% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000040s : 0.00% optimize.loop_unroll : 0.000610s : 0.06% optimize.opt_after_cconv.c_1 : 0.000063s : 0.01% optimize.opt_after_cconv.parameter_eliminate : 0.000006s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000012s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.cse : 0.000045s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000064s : 0.01% optimize.tuple_transform.d_1 : 0.000129s : 0.01% optimize.tuple_transform.none_parameter_eliminate : 0.000002s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.tuple_transform.switch_simplify : 0.000014s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_recomputation : 0.000100s : 0.01% optimize.cse_after_recomputation.cse : 0.000033s : 0.00% optimize.environ_conv : 0.000016s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000010s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000006s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000003s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000003s : 0.00% optimize.assign_add_opt : 0.000001s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000002s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000002s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000002s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000024s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000008s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000008s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000002s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000002s : 0.00% optimize.overlap_recompute_comm : 0.000002s : 0.00% optimize.overlap_grad_ring_attention : 0.000006s : 0.00% optimize.overlap_grad_flash_sp : 0.000033s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000003s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000014s : 0.00% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000023s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000027s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000014s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000022s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000001s : 0.00% detach_backward : 0.000004s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000031s : 0.00% get_jit_bprop_graph : 0.000002s : 0.00% rewriter_after_jit_bprop_graph : 0.000006s : 0.00% opt_after_jit_grad : 0.000650s : 0.06% validate : 0.000082s : 0.01% Time group info: ------[substitution.] 0.000505 111 9.42% : 0.000048s : 6: substitution.arithmetic_simplify 1.07% : 0.000005s : 6: substitution.elim_not_effective 2.52% : 0.000013s : 6: substitution.float_tuple_getitem_switch 0.67% : 0.000003s : 6: substitution.fold_const_symbol 2.02% : 0.000010s : 8: substitution.graph_param_transform 45.15% : 0.000228s : 5: substitution.inline 1.89% : 0.000010s : 12: substitution.j_node_and_user_rematch 2.14% : 0.000011s : 4: substitution.minmaximum_grad 2.26% : 0.000011s : 12: substitution.remove_not_recompute_node 1.37% : 0.000007s : 2: substitution.replace_old_param 7.80% : 0.000039s : 8: substitution.tuple_list_convert_item_index_to_positive 3.13% : 0.000016s : 8: substitution.tuple_list_get_item_const_eliminator 4.71% : 0.000024s : 8: substitution.tuple_list_get_item_depend_reorder 11.18% : 0.000057s : 12: substitution.tuple_list_get_item_eliminator 4.65% : 0.000024s : 8: substitution.tuple_list_get_set_item_eliminator ------[type_inference.] 1.053813 2 99.64% : 1.049983s : 1: type_inference.infer 0.36% : 0.003831s : 1: type_inference.specialize ------[replace.] 0.000050 5 100.00% : 0.000050s : 5: replace.inline ------[match.] 0.000224 5 100.00% : 0.000224s : 5: match.inline ------[predicate.] 0.000407 2113 0.92% : 0.000004s : 21: predicate.accumulaten_eliminater 0.96% : 0.000004s : 8: predicate.ad_related_special_op_eliminate 0.65% : 0.000003s : 16: predicate.addn_check_dump 0.94% : 0.000004s : 21: predicate.addn_zero_filter 0.81% : 0.000003s : 21: predicate.adjust_all_reduce_mul_add 2.35% : 0.000010s : 37: predicate.arithmetic_simplify 0.97% : 0.000004s : 21: predicate.cast_eliminate 0.83% : 0.000003s : 16: predicate.check_bprop_eliminate 0.65% : 0.000003s : 16: predicate.compare_switch_simplify 0.20% : 0.000001s : 8: predicate.const_output_eliminate 0.67% : 0.000003s : 16: predicate.depend_value_elim 0.93% : 0.000004s : 21: predicate.dict_get_item_const_eliminator 1.06% : 0.000004s : 21: predicate.dict_get_item_eliminator 0.92% : 0.000004s : 21: predicate.dict_set_item_eliminator 0.93% : 0.000004s : 16: predicate.dumpgradient_eliminate 0.27% : 0.000001s : 8: predicate.elim_not_effective 0.49% : 0.000002s : 8: predicate.elim_shapecalc_of_broadcastargs 1.30% : 0.000005s : 29: predicate.environ_add_const_eliminate 1.14% : 0.000005s : 29: predicate.environ_get_add_eliminate 1.13% : 0.000005s : 29: predicate.environ_get_depend_swap 1.96% : 0.000008s : 45: predicate.environ_get_eliminate 1.24% : 0.000005s : 29: predicate.environ_get_set_eliminate 1.06% : 0.000004s : 26: predicate.exchange_switch_depend_value 1.76% : 0.000007s : 26: predicate.float_depend_g_call 0.67% : 0.000003s : 16: predicate.float_environ_get_switch 1.17% : 0.000005s : 24: predicate.float_tuple_getitem_switch 0.19% : 0.000001s : 8: predicate.fold_const_symbol 0.79% : 0.000003s : 16: predicate.get_grad_eliminate 0.25% : 0.000001s : 8: predicate.graph_param_transform 0.68% : 0.000003s : 16: predicate.incorporate_call 0.61% : 0.000002s : 16: predicate.incorporate_call_switch 5.92% : 0.000024s : 95: predicate.inline 1.00% : 0.000004s : 16: predicate.inline_without_move 0.36% : 0.000001s : 16: predicate.j_node_and_user_rematch 0.91% : 0.000004s : 16: predicate.less_batch_normalization 1.70% : 0.000007s : 37: predicate.list_to_tuple_eliminator_ 2.17% : 0.000009s : 58: predicate.load_eliminater 1.11% : 0.000005s : 8: predicate.loop_unroll_after_grad 1.58% : 0.000006s : 36: predicate.loop_unroll_before_grad 1.79% : 0.000007s : 37: predicate.make_slice_get_slice_eliminator 0.70% : 0.000003s : 16: predicate.merge_addn 0.69% : 0.000003s : 16: predicate.micro_step_allgather_replace 0.68% : 0.000003s : 16: predicate.mini_step_allgather_replace 0.85% : 0.000003s : 21: predicate.minmaximum_grad 0.97% : 0.000004s : 8: predicate.mutable_eliminate 0.40% : 0.000002s : 8: predicate.opt_reshape 0.40% : 0.000002s : 8: predicate.parallel_virtual_node 1.47% : 0.000006s : 26: predicate.partial_defer_inline 1.29% : 0.000005s : 29: predicate.partial_eliminate 0.87% : 0.000004s : 21: predicate.print_const_string_wrapper 0.70% : 0.000003s : 16: predicate.reduce_all_const_elim 1.28% : 0.000005s : 21: predicate.reduce_eliminate 2.34% : 0.000010s : 58: predicate.redundant_stop_gradient_eliminater 0.38% : 0.000002s : 16: predicate.remove_not_recompute_node 1.30% : 0.000005s : 37: predicate.replace_applicator 0.51% : 0.000002s : 16: predicate.replace_old_param 0.21% : 0.000001s : 8: predicate.reset_defer_inline 0.91% : 0.000004s : 21: predicate.reshape_eliminate 0.71% : 0.000003s : 16: predicate.row_tensor_add_zeros_like 0.52% : 0.000002s : 8: predicate.row_tensor_eliminate 1.17% : 0.000005s : 16: predicate.same_eliminate 0.46% : 0.000002s : 16: predicate.set_cell_output_no_recompute 0.85% : 0.000003s : 16: predicate.shard_identity_eliminate 0.82% : 0.000003s : 16: predicate.special_op_eliminate 0.90% : 0.000004s : 16: predicate.specialize_transform 1.05% : 0.000004s : 16: predicate.split_environ_get_set_with_tuple_value 0.79% : 0.000003s : 16: predicate.stack_unstack_eliminate 0.40% : 0.000002s : 8: predicate.switch_call_monad_eliminater 1.15% : 0.000005s : 26: predicate.switch_defer_inline 1.95% : 0.000008s : 42: predicate.switch_layer_defer_inline 3.93% : 0.000016s : 86: predicate.switch_simplify 0.88% : 0.000004s : 21: predicate.tile_eliminate 0.91% : 0.000004s : 21: predicate.transpose_eliminate 1.81% : 0.000007s : 37: predicate.tuple_list_convert_item_index_to_positive 1.92% : 0.000008s : 37: predicate.tuple_list_get_item_const_eliminator 1.72% : 0.000007s : 37: predicate.tuple_list_get_item_depend_reorder 3.24% : 0.000013s : 53: predicate.tuple_list_get_item_eliminator 1.76% : 0.000007s : 37: predicate.tuple_list_get_set_item_eliminator 2.58% : 0.000011s : 53: predicate.tuple_list_set_item_eliminator 1.59% : 0.000006s : 37: predicate.tuple_to_list_eliminator_ 2.28% : 0.000009s : 58: predicate.updatestate_pure_node_eliminater 3.01% : 0.000012s : 74: predicate.updatestate_useless_node_eliminater 0.38% : 0.000002s : 8: predicate.value_based_eliminate 0.73% : 0.000003s : 16: predicate.virtual_dataset_eliminate 0.77% : 0.000003s : 16: predicate.virtual_output_eliminate 0.30% : 0.000001s : 8: predicate.virtual_view_grad_eliminate 0.43% : 0.000002s : 8: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.005766 32 69.50% : 0.004008s : 25: func_graph_cloner_run.FuncGraphClonerGraph 30.50% : 0.001759s : 7: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 1.312657 192 0.00% : 0.000004s : 1: ForceFp32Comm 8.37% : 0.109918s : 1: add_attr 8.37% : 0.109895s : 1: add_attr_with_inline 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.01% : 0.000106s : 1: add_recomputation 0.00% : 0.000005s : 1: assign_add_opt 0.01% : 0.000094s : 1: auto_monad 0.00% : 0.000037s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000007s : 1: bias_add_comm_swap 0.07% : 0.000874s : 1: bootstrap 0.00% : 0.000044s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000029s : 1: control_data_broadcast_order 0.00% : 0.000019s : 1: convert_after_rewriter 0.00% : 0.000050s : 1: cse_after_recomputation 0.00% : 0.000007s : 1: dataset_repeat_opt 0.00% : 0.000008s : 1: detach_backward 0.00% : 0.000021s : 1: environ_conv 0.00% : 0.000030s : 1: event_method 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000011s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000005s : 1: handle_group_info 0.00% : 0.000006s : 1: inline 0.00% : 0.000006s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000009s : 1: label_micro_interleaved_index 0.05% : 0.000624s : 1: loop_unroll 0.00% : 0.000006s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.07% : 0.000864s : 1: mutable_eliminate 0.00% : 0.000011s : 1: offloading_packed_experts 0.00% : 0.000028s : 1: opt.transform.loop_unroll_optimizer 0.00% : 0.000030s : 1: opt.transform.mutable_eliminate 0.18% : 0.002406s : 78: opt.transform.opt_a 0.00% : 0.000061s : 1: opt.transform.opt_after_cconv 0.00% : 0.000051s : 1: opt.transform.opt_after_jit_grad 0.02% : 0.000300s : 28: opt.transform.opt_b 0.01% : 0.000139s : 2: opt.transform.opt_trans_graph 0.01% : 0.000081s : 4: opt.transform.symbol_engine_opt 0.79% : 0.010331s : 1: opt_a 0.01% : 0.000188s : 1: opt_after_cconv 0.05% : 0.000663s : 1: opt_after_jit_grad 0.04% : 0.000483s : 1: opt_b 1.07% : 0.014019s : 1: optimize 0.00% : 0.000034s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000013s : 1: order_py_execute_after_rewriter 0.00% : 0.000037s : 1: overlap_grad_flash_sp 0.00% : 0.000005s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000011s : 1: overlap_grad_ring_attention 0.00% : 0.000005s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000007s : 1: overlap_param_gather 0.00% : 0.000005s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000012s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000005s : 1: overlap_recompute_comm 0.00% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000005s : 1: pipeline_parallel_scheduler 0.00% : 0.000005s : 1: pipeline_split 0.00% : 0.000056s : 1: pre_auto_parallel 0.00% : 0.000048s : 1: py_interpret_to_execute 0.00% : 0.000033s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.01% : 0.000070s : 1: remove_dup_value 0.31% : 0.004018s : 1: renormalize.infer 0.15% : 0.002017s : 1: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000009s : 1: rewriter_after_jit_bprop_graph 0.02% : 0.000320s : 1: rewriter_after_opt_a 0.01% : 0.000110s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000006s : 1: split_matmul_comm_elemetwise 0.00% : 0.000014s : 1: swap_dp_allreduce_reducescatter 0.01% : 0.000144s : 1: symbol_engine_optimizer 0.01% : 0.000178s : 1: tuple_transform 80.29% : 1.053924s : 1: type_inference . [hook] pytest_runtest_teardown:test_swiglu[False-float16--1-shape1] tests/st/infer/ops/test_internal_ops/test_swiglu_v2.py::test_swiglu[False-float16--1-shape1],max_mem:100.0M [WARNING] ME(161016:281473848516400,MainProcess):2026-01-29-17:37:59.395.3 [mindspore/context.py:1334] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. TotalTime = 1.39641, [21] [bootstrap]: 0.00173779 [type_inference]: 1.3155 [event_method]: 2.478e-05 [auto_monad]: 9.13e-05 [graph_reusing]: 6.71e-06 [inline]: 2.71999e-06 [add_attr]: 0.00679903, [1] [add_attr_with_inline]: 0.00678048, [1] [Cycle 1]: 8.687e-05, [2] [tag_attr]: 2.502e-05 [meta_addattr_fg_expand]: 6.34999e-06 [parallel-infer-symbol]: 4.70001e-06 [pre_auto_parallel]: 4.786e-05 [insert-virtual-dataset]: 2.73998e-06 [parallel-infer-symbol-second]: 9.00007e-07 [dataset_repeat_opt]: 1.86e-06 [pipeline_split]: 1.96e-06 [optimize]: 0.0711585, [53] [py_interpret_to_execute]: 4.099e-05 [rewriter_before_opt_a]: 0.00014773 [opt_a]: 0.0672165, [2] [Cycle 1]: 0.0650459, [45] [expand_dump_flag]: 3.51999e-06 [switch_simplify]: 4.554e-05 [loop_unroll]: 3.004e-05 [a_1]: 0.00112607 [with_stream_mark]: 3.238e-05 [recompute_prepare]: 2.478e-05 [updatestate_depend_eliminate]: 9.99001e-06 [updatestate_assign_eliminate]: 6.61999e-06 [updatestate_loads_eliminate]: 5.87999e-06 [parameter_eliminate]: 2.36e-06 [a_2]: 0.00022353 [accelerated_algorithm]: 1.804e-05 [shard]: 2.98e-06 [meta_shard_fg_expand]: 3.71999e-06 [shard_inline]: 1.272e-05 [merge_send_recv]: 1.536e-05 [auto_parallel]: 1.425e-05 [parallel]: 3.709e-05 [flash_sp]: 1.332e-05 [merge_comm]: 7.23e-06 [allreduce_fusion]: 6.96001e-06 [matmul_add_comm_reduction]: 1.661e-05 [allreduce_slice_to_reducescatter]: 6.69999e-07 [virtual_shard_identity]: 1.803e-05 [virtual_dataset]: 1.349e-05 [get_grad_eliminate_]: 1.391e-05 [virtual_output]: 1.315e-05 [merge_forward]: 8.17e-06 [cell_reuse_recompute_pass]: 1.77001e-06 [offload_activation]: 1.793e-05 [cell_reuse_handle_not_recompute_node_pass]: 2.957e-05 [merge_recompute_call_nodes]: 1.66002e-06 [before_grad]: 2.474e-05 [set_forward_comm_id_for_comm_node_pass]: 8.70999e-06 [meta_fg_expand]: 6.42001e-06 [flash_sp_send_recv_attached]: 2.93e-06 [receive_attached]: 2.72001e-06 [after_resolve]: 1.868e-05 [a_after_grad]: 2.365e-05 [renormalize]: 0.06254 [add_forward_monad_depend]: 1.415e-05 [auto_monad_grad]: 3.33998e-06 [auto_monad_eliminator]: 3.604e-05 [cse]: 0.00012294 [a_3]: 0.00011442 [Cycle 2]: 0.00215287, [45] [expand_dump_flag]: 2.54999e-06 [switch_simplify]: 1.72e-05 [loop_unroll]: 1.343e-05 [a_1]: 0.00038724 [with_stream_mark]: 3.108e-05 [recompute_prepare]: 1.783e-05 [updatestate_depend_eliminate]: 1.011e-05 [updatestate_assign_eliminate]: 6.02999e-06 [updatestate_loads_eliminate]: 5.38002e-06 [parameter_eliminate]: 2.76e-06 [a_2]: 0.00019003 [accelerated_algorithm]: 1.509e-05 [shard]: 3.83001e-06 [meta_shard_fg_expand]: 4.08999e-06 [shard_inline]: 1.29e-05 [merge_send_recv]: 1.445e-05 [auto_parallel]: 1.575e-05 [parallel]: 1.113e-05 [flash_sp]: 4.64998e-06 [merge_comm]: 7.15e-06 [allreduce_fusion]: 8.06001e-06 [matmul_add_comm_reduction]: 1.64e-05 [allreduce_slice_to_reducescatter]: 9.5999e-07 [virtual_shard_identity]: 1.918e-05 [virtual_dataset]: 1.286e-05 [get_grad_eliminate_]: 1.413e-05 [virtual_output]: 1.252e-05 [merge_forward]: 8.80999e-06 [cell_reuse_recompute_pass]: 3.36001e-06 [offload_activation]: 1.812e-05 [cell_reuse_handle_not_recompute_node_pass]: 2.845e-05 [merge_recompute_call_nodes]: 1.86998e-06 [before_grad]: 2.294e-05 [set_forward_comm_id_for_comm_node_pass]: 8.78001e-06 [meta_fg_expand]: 5.81e-06 [flash_sp_send_recv_attached]: 2.30002e-06 [receive_attached]: 2.62001e-06 [after_resolve]: 1.814e-05 [a_after_grad]: 1.945e-05 [renormalize]: 6.00121e-08 [add_forward_monad_depend]: 5.44e-06 [auto_monad_grad]: 3.26999e-06 [auto_monad_eliminator]: 2.2e-05 [cse]: 0.00072064 [a_3]: 0.00010261 [py_interpret_to_execute_after_opt_a]: 3.222e-05 [slice_cell_reuse_recomputed_activation]: 2.76999e-06 [rewriter_after_opt_a]: 0.0003319 [convert_after_rewriter]: 1.544e-05 [order_py_execute_after_rewriter]: 9.45001e-06 [mutable_eliminate]: 0.00090346 [opt_b]: 0.00048372, [1] [Cycle 1]: 0.00047453, [7] [b_1]: 0.0003222 [b_2]: 1.545e-05 [updatestate_depend_eliminate]: 1.274e-05 [updatestate_assign_eliminate]: 6.32001e-06 [updatestate_loads_eliminate]: 5.76e-06 [renormalize]: 8.89995e-07 [cse]: 6.673e-05 [optimize_parallel_all_gather_comm]: 3.619e-05 [overlap_param_gather]: 2.96001e-06 [cconv]: 3.876e-05 [loop_unroll]: 0.00052253 [opt_after_cconv]: 0.00017102, [1] [Cycle 1]: 0.0001641, [7] [c_1]: 6.1e-05 [parameter_eliminate]: 5.17e-06 [updatestate_depend_eliminate]: 1.04e-05 [updatestate_assign_eliminate]: 5.22e-06 [updatestate_loads_eliminate]: 4.75001e-06 [cse]: 3.972e-05 [renormalize]: 4.30009e-07 [remove_dup_value]: 6.491e-05 [tuple_transform]: 0.0001738, [1] [Cycle 1]: 0.00016866, [4] [d_1]: 0.00012797 [none_parameter_eliminate]: 1.91e-06 [renormalize]: 2.09984e-07 [switch_simplify]: 1.399e-05 [partial_unused_args_eliminate]: 2.41e-06 [add_recomputation]: 9.666e-05 [cse_after_recomputation]: 4.611e-05, [1] [Cycle 1]: 3.998e-05, [1] [cse]: 3.199e-05 [environ_conv]: 1.752e-05 [swap_dp_allreduce_reducescatter]: 1.127e-05 [bias_add_comm_swap]: 3.27002e-06 [label_micro_interleaved_index]: 6.26e-06 [label_fine_grained_interleaved_index]: 3.09001e-06 [merge_cast_opt]: 1.44998e-06 [slice_recompute_activation]: 2.61e-06 [micro_interleaved_order_control]: 2.46e-06 [assign_add_opt]: 1.24998e-06 [ForceFp32Comm]: 1.23002e-06 [remove_cast_before_assign_add]: 1.39e-06 [full_micro_interleaved_order_control]: 2.38998e-06 [reorder_send_recv_between_fp_bp]: 3.60998e-06 [comm_op_add_attrs]: 1.30001e-06 [add_comm_op_reuse_tag]: 1.15001e-06 [interleave_split_concat_branches]: 1.52001e-06 [interleave_parallel_branches]: 1.15999e-06 [overlap_opt_shard_in_pipeline]: 2.97002e-06 [overlap_opt_shard_grad_in_pipeline]: 1.91e-06 [control_data_broadcast_order]: 2.329e-05 [grouped_pairwise_exchange_alltoall]: 1.69e-06 [offloading_packed_experts]: 6.46999e-06 [overlap_recompute_and_grad_model_parallel]: 7.77e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.34e-06 [overlap_recompute_allgather_and_fa_grad]: 1.50001e-06 [overlap_recompute_comm]: 2.72001e-06 [overlap_grad_ring_attention]: 6.21998e-06 [overlap_grad_flash_sp]: 3.399e-05 [begin_end_overlap_inline]: 5.59987e-07 [split_matmul_comm_elemetwise]: 2.66999e-06 [split_layernorm_comm]: 2.17001e-06 [handle_group_info]: 1.20999e-06 [symbol_engine_optimizer]: 0.00036711, [1] [Cycle 1]: 0.00036154, [6] [build]: 0.00019559 [elim_shapecalc]: 2.113e-05 [elim_not_effective]: 4.765e-05 [opt_reshape]: 1.536e-05 [fold_const_symbol]: 3.784e-05 [renormalize]: 3.00002e-07 [detach_backward]: 2.59001e-06 [pipeline_parallel_scheduler]: 1.73997e-06 [auto_monad_reorder]: 3.032e-05 [get_jit_bprop_graph]: 3.48999e-06 [rewriter_after_jit_bprop_graph]: 6.68e-06 [opt_after_jit_grad]: 0.00064401 [validate]: 7.21e-05 Sums bootstrap : 0.001738s : 0.13% type_inference : 1.315500s : 94.76% event_method : 0.000025s : 0.00% auto_monad : 0.000091s : 0.01% graph_reusing : 0.000007s : 0.00% inline : 0.000003s : 0.00% add_attr.add_attr_with_inline.tag_attr : 0.000025s : 0.00% add_attr.add_attr_with_inline.meta_addattr_fg_expand : 0.000006s : 0.00% parallel-infer-symbol : 0.000005s : 0.00% pre_auto_parallel : 0.000048s : 0.00% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000041s : 0.00% optimize.rewriter_before_opt_a : 0.000148s : 0.01% optimize.opt_a.expand_dump_flag : 0.000006s : 0.00% optimize.opt_a.switch_simplify : 0.000063s : 0.00% optimize.opt_a.loop_unroll : 0.000043s : 0.00% optimize.opt_a.a_1 : 0.001513s : 0.11% optimize.opt_a.with_stream_mark : 0.000063s : 0.00% optimize.opt_a.recompute_prepare : 0.000043s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000020s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000013s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.00% optimize.opt_a.parameter_eliminate : 0.000005s : 0.00% optimize.opt_a.a_2 : 0.000414s : 0.03% optimize.opt_a.accelerated_algorithm : 0.000033s : 0.00% optimize.opt_a.shard : 0.000007s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000008s : 0.00% optimize.opt_a.shard_inline : 0.000026s : 0.00% optimize.opt_a.merge_send_recv : 0.000030s : 0.00% optimize.opt_a.auto_parallel : 0.000030s : 0.00% optimize.opt_a.parallel : 0.000048s : 0.00% optimize.opt_a.flash_sp : 0.000018s : 0.00% optimize.opt_a.merge_comm : 0.000014s : 0.00% optimize.opt_a.allreduce_fusion : 0.000015s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000033s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000037s : 0.00% optimize.opt_a.virtual_dataset : 0.000026s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000028s : 0.00% optimize.opt_a.virtual_output : 0.000026s : 0.00% optimize.opt_a.merge_forward : 0.000017s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000005s : 0.00% optimize.opt_a.offload_activation : 0.000036s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000058s : 0.00% optimize.opt_a.merge_recompute_call_nodes : 0.000004s : 0.00% optimize.opt_a.before_grad : 0.000048s : 0.00% optimize.opt_a.set_forward_comm_id_for_comm_node_pass : 0.000017s : 0.00% optimize.opt_a.meta_fg_expand : 0.000012s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.00% optimize.opt_a.receive_attached : 0.000005s : 0.00% optimize.opt_a.after_resolve : 0.000037s : 0.00% optimize.opt_a.a_after_grad : 0.000043s : 0.00% optimize.opt_a.renormalize : 0.062540s : 4.51% optimize.opt_a.add_forward_monad_depend : 0.000020s : 0.00% optimize.opt_a.auto_monad_grad : 0.000007s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000058s : 0.00% optimize.opt_a.cse : 0.000844s : 0.06% optimize.opt_a.a_3 : 0.000217s : 0.02% optimize.py_interpret_to_execute_after_opt_a : 0.000032s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000332s : 0.02% optimize.convert_after_rewriter : 0.000015s : 0.00% optimize.order_py_execute_after_rewriter : 0.000009s : 0.00% optimize.mutable_eliminate : 0.000903s : 0.07% optimize.opt_b.b_1 : 0.000322s : 0.02% optimize.opt_b.b_2 : 0.000015s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000013s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000006s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000067s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000036s : 0.00% optimize.overlap_param_gather : 0.000003s : 0.00% optimize.cconv : 0.000039s : 0.00% optimize.loop_unroll : 0.000523s : 0.04% optimize.opt_after_cconv.c_1 : 0.000061s : 0.00% optimize.opt_after_cconv.parameter_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000010s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.cse : 0.000040s : 0.00% optimize.opt_after_cconv.renormalize : 0.000000s : 0.00% optimize.remove_dup_value : 0.000065s : 0.00% optimize.tuple_transform.d_1 : 0.000128s : 0.01% optimize.tuple_transform.none_parameter_eliminate : 0.000002s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.tuple_transform.switch_simplify : 0.000014s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_recomputation : 0.000097s : 0.01% optimize.cse_after_recomputation.cse : 0.000032s : 0.00% optimize.environ_conv : 0.000018s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000011s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000006s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000003s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000003s : 0.00% optimize.micro_interleaved_order_control : 0.000002s : 0.00% optimize.assign_add_opt : 0.000001s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000004s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000002s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000003s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000023s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000006s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000008s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000002s : 0.00% optimize.overlap_recompute_comm : 0.000003s : 0.00% optimize.overlap_grad_ring_attention : 0.000006s : 0.00% optimize.overlap_grad_flash_sp : 0.000034s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000003s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000196s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000021s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000048s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000015s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000038s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% detach_backward : 0.000003s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000030s : 0.00% get_jit_bprop_graph : 0.000003s : 0.00% rewriter_after_jit_bprop_graph : 0.000007s : 0.00% opt_after_jit_grad : 0.000644s : 0.05% validate : 0.000072s : 0.01% Time group info: ------[substitution.] 0.000561 111 9.46% : 0.000053s : 6: substitution.arithmetic_simplify 4.48% : 0.000025s : 6: substitution.elim_not_effective 2.36% : 0.000013s : 6: substitution.float_tuple_getitem_switch 3.43% : 0.000019s : 6: substitution.fold_const_symbol 1.86% : 0.000010s : 8: substitution.graph_param_transform 42.25% : 0.000237s : 5: substitution.inline 1.91% : 0.000011s : 12: substitution.j_node_and_user_rematch 2.13% : 0.000012s : 4: substitution.minmaximum_grad 1.99% : 0.000011s : 12: substitution.remove_not_recompute_node 1.20% : 0.000007s : 2: substitution.replace_old_param 6.88% : 0.000039s : 8: substitution.tuple_list_convert_item_index_to_positive 2.97% : 0.000017s : 8: substitution.tuple_list_get_item_const_eliminator 3.99% : 0.000022s : 8: substitution.tuple_list_get_item_depend_reorder 10.94% : 0.000061s : 12: substitution.tuple_list_get_item_eliminator 4.14% : 0.000023s : 8: substitution.tuple_list_get_set_item_eliminator ------[type_inference.] 1.315386 2 99.69% : 1.311305s : 1: type_inference.infer 0.31% : 0.004081s : 1: type_inference.specialize ------[replace.] 0.000056 5 100.00% : 0.000056s : 5: replace.inline ------[match.] 0.000232 5 100.00% : 0.000232s : 5: match.inline ------[predicate.] 0.000408 2113 0.92% : 0.000004s : 21: predicate.accumulaten_eliminater 0.82% : 0.000003s : 8: predicate.ad_related_special_op_eliminate 0.64% : 0.000003s : 16: predicate.addn_check_dump 1.01% : 0.000004s : 21: predicate.addn_zero_filter 0.82% : 0.000003s : 21: predicate.adjust_all_reduce_mul_add 2.31% : 0.000009s : 37: predicate.arithmetic_simplify 0.92% : 0.000004s : 21: predicate.cast_eliminate 0.73% : 0.000003s : 16: predicate.check_bprop_eliminate 0.66% : 0.000003s : 16: predicate.compare_switch_simplify 0.20% : 0.000001s : 8: predicate.const_output_eliminate 0.69% : 0.000003s : 16: predicate.depend_value_elim 0.90% : 0.000004s : 21: predicate.dict_get_item_const_eliminator 1.10% : 0.000004s : 21: predicate.dict_get_item_eliminator 0.90% : 0.000004s : 21: predicate.dict_set_item_eliminator 0.95% : 0.000004s : 16: predicate.dumpgradient_eliminate 0.22% : 0.000001s : 8: predicate.elim_not_effective 0.47% : 0.000002s : 8: predicate.elim_shapecalc_of_broadcastargs 1.18% : 0.000005s : 29: predicate.environ_add_const_eliminate 1.21% : 0.000005s : 29: predicate.environ_get_add_eliminate 1.13% : 0.000005s : 29: predicate.environ_get_depend_swap 1.92% : 0.000008s : 45: predicate.environ_get_eliminate 1.16% : 0.000005s : 29: predicate.environ_get_set_eliminate 1.06% : 0.000004s : 26: predicate.exchange_switch_depend_value 1.77% : 0.000007s : 26: predicate.float_depend_g_call 0.63% : 0.000003s : 16: predicate.float_environ_get_switch 1.04% : 0.000004s : 24: predicate.float_tuple_getitem_switch 0.20% : 0.000001s : 8: predicate.fold_const_symbol 0.75% : 0.000003s : 16: predicate.get_grad_eliminate 0.24% : 0.000001s : 8: predicate.graph_param_transform 0.69% : 0.000003s : 16: predicate.incorporate_call 0.62% : 0.000003s : 16: predicate.incorporate_call_switch 5.97% : 0.000024s : 95: predicate.inline 1.09% : 0.000004s : 16: predicate.inline_without_move 0.36% : 0.000001s : 16: predicate.j_node_and_user_rematch 1.28% : 0.000005s : 16: predicate.less_batch_normalization 1.93% : 0.000008s : 37: predicate.list_to_tuple_eliminator_ 2.20% : 0.000009s : 58: predicate.load_eliminater 0.79% : 0.000003s : 8: predicate.loop_unroll_after_grad 1.50% : 0.000006s : 36: predicate.loop_unroll_before_grad 1.71% : 0.000007s : 37: predicate.make_slice_get_slice_eliminator 0.67% : 0.000003s : 16: predicate.merge_addn 0.67% : 0.000003s : 16: predicate.micro_step_allgather_replace 0.72% : 0.000003s : 16: predicate.mini_step_allgather_replace 0.90% : 0.000004s : 21: predicate.minmaximum_grad 0.88% : 0.000004s : 8: predicate.mutable_eliminate 0.45% : 0.000002s : 8: predicate.opt_reshape 0.55% : 0.000002s : 8: predicate.parallel_virtual_node 1.50% : 0.000006s : 26: predicate.partial_defer_inline 1.26% : 0.000005s : 29: predicate.partial_eliminate 0.86% : 0.000004s : 21: predicate.print_const_string_wrapper 0.86% : 0.000004s : 16: predicate.reduce_all_const_elim 1.20% : 0.000005s : 21: predicate.reduce_eliminate 2.26% : 0.000009s : 58: predicate.redundant_stop_gradient_eliminater 0.42% : 0.000002s : 16: predicate.remove_not_recompute_node 1.33% : 0.000005s : 37: predicate.replace_applicator 0.48% : 0.000002s : 16: predicate.replace_old_param 0.23% : 0.000001s : 8: predicate.reset_defer_inline 0.91% : 0.000004s : 21: predicate.reshape_eliminate 0.76% : 0.000003s : 16: predicate.row_tensor_add_zeros_like 0.52% : 0.000002s : 8: predicate.row_tensor_eliminate 1.33% : 0.000005s : 16: predicate.same_eliminate 0.53% : 0.000002s : 16: predicate.set_cell_output_no_recompute 0.88% : 0.000004s : 16: predicate.shard_identity_eliminate 0.75% : 0.000003s : 16: predicate.special_op_eliminate 0.82% : 0.000003s : 16: predicate.specialize_transform 1.15% : 0.000005s : 16: predicate.split_environ_get_set_with_tuple_value 0.77% : 0.000003s : 16: predicate.stack_unstack_eliminate 0.46% : 0.000002s : 8: predicate.switch_call_monad_eliminater 1.15% : 0.000005s : 26: predicate.switch_defer_inline 1.77% : 0.000007s : 42: predicate.switch_layer_defer_inline 3.82% : 0.000016s : 86: predicate.switch_simplify 0.90% : 0.000004s : 21: predicate.tile_eliminate 1.01% : 0.000004s : 21: predicate.transpose_eliminate 1.77% : 0.000007s : 37: predicate.tuple_list_convert_item_index_to_positive 2.00% : 0.000008s : 37: predicate.tuple_list_get_item_const_eliminator 1.69% : 0.000007s : 37: predicate.tuple_list_get_item_depend_reorder 3.16% : 0.000013s : 53: predicate.tuple_list_get_item_eliminator 1.70% : 0.000007s : 37: predicate.tuple_list_get_set_item_eliminator 2.68% : 0.000011s : 53: predicate.tuple_list_set_item_eliminator 1.66% : 0.000007s : 37: predicate.tuple_to_list_eliminator_ 2.25% : 0.000009s : 58: predicate.updatestate_pure_node_eliminater 2.99% : 0.000012s : 74: predicate.updatestate_useless_node_eliminater 0.38% : 0.000002s : 8: predicate.value_based_eliminate 0.79% : 0.000003s : 16: predicate.virtual_dataset_eliminate 0.71% : 0.000003s : 16: predicate.virtual_output_eliminate 0.31% : 0.000001s : 8: predicate.virtual_view_grad_eliminate 0.42% : 0.000002s : 8: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.005787 32 68.36% : 0.003956s : 25: func_graph_cloner_run.FuncGraphClonerGraph 31.64% : 0.001831s : 7: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 1.539929 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.44% : 0.006806s : 1: add_attr 0.44% : 0.006786s : 1: add_attr_with_inline 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.01% : 0.000102s : 1: add_recomputation 0.00% : 0.000005s : 1: assign_add_opt 0.01% : 0.000098s : 1: auto_monad 0.00% : 0.000036s : 1: auto_monad_reorder 0.00% : 0.000004s : 1: begin_end_overlap_inline 0.00% : 0.000007s : 1: bias_add_comm_swap 0.12% : 0.001840s : 1: bootstrap 0.00% : 0.000043s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000027s : 1: control_data_broadcast_order 0.00% : 0.000021s : 1: convert_after_rewriter 0.00% : 0.000049s : 1: cse_after_recomputation 0.00% : 0.000005s : 1: dataset_repeat_opt 0.00% : 0.000011s : 1: detach_backward 0.00% : 0.000021s : 1: environ_conv 0.00% : 0.000033s : 1: event_method 0.00% : 0.000007s : 1: full_micro_interleaved_order_control 0.00% : 0.000008s : 1: get_jit_bprop_graph 0.00% : 0.000011s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000005s : 1: handle_group_info 0.00% : 0.000006s : 1: inline 0.00% : 0.000006s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000009s : 1: label_micro_interleaved_index 0.03% : 0.000534s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.06% : 0.000917s : 1: mutable_eliminate 0.00% : 0.000010s : 1: offloading_packed_experts 0.00% : 0.000023s : 1: opt.transform.loop_unroll_optimizer 0.00% : 0.000031s : 1: opt.transform.mutable_eliminate 0.16% : 0.002516s : 78: opt.transform.opt_a 0.00% : 0.000060s : 1: opt.transform.opt_after_cconv 0.00% : 0.000048s : 1: opt.transform.opt_after_jit_grad 0.02% : 0.000301s : 28: opt.transform.opt_b 0.01% : 0.000139s : 2: opt.transform.opt_trans_graph 0.01% : 0.000117s : 4: opt.transform.symbol_engine_opt 4.37% : 0.067221s : 1: opt_a 0.01% : 0.000175s : 1: opt_after_cconv 0.04% : 0.000658s : 1: opt_after_jit_grad 0.03% : 0.000488s : 1: opt_b 4.62% : 0.071166s : 1: optimize 0.00% : 0.000040s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000014s : 1: order_py_execute_after_rewriter 0.00% : 0.000038s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000010s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000006s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000007s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000011s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000006s : 1: overlap_recompute_comm 0.00% : 0.000011s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000005s : 1: pipeline_parallel_scheduler 0.00% : 0.000005s : 1: pipeline_split 0.00% : 0.000053s : 1: pre_auto_parallel 0.00% : 0.000046s : 1: py_interpret_to_execute 0.00% : 0.000038s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000069s : 1: remove_dup_value 3.92% : 0.060432s : 1: renormalize.infer 0.14% : 0.002091s : 1: renormalize.specialize 0.00% : 0.000007s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000010s : 1: rewriter_after_jit_bprop_graph 0.02% : 0.000344s : 1: rewriter_after_opt_a 0.01% : 0.000154s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000015s : 1: swap_dp_allreduce_reducescatter 0.02% : 0.000370s : 1: symbol_engine_optimizer 0.01% : 0.000177s : 1: tuple_transform 85.43% : 1.315531s : 1: type_inference . [hook] pytest_runtest_teardown:test_swiglu[True-float32--1-shape0] tests/st/infer/ops/test_internal_ops/test_swiglu_v2.py::test_swiglu[True-float32--1-shape0],max_mem:100.0M [WARNING] ME(161016:281473848516400,MainProcess):2026-01-29-17:38:03.225.652 [mindspore/context.py:1334] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. TotalTime = 1.47237, [21] [bootstrap]: 0.00078196 [type_inference]: 1.40903 [event_method]: 2.676e-05 [auto_monad]: 0.00010022 [graph_reusing]: 7.25e-06 [inline]: 6.63e-06 [add_attr]: 0.0467716, [1] [add_attr_with_inline]: 0.0467546, [1] [Cycle 1]: 7.879e-05, [2] [tag_attr]: 2.427e-05 [meta_addattr_fg_expand]: 6.07001e-06 [parallel-infer-symbol]: 4.18999e-06 [pre_auto_parallel]: 4.595e-05 [insert-virtual-dataset]: 2.76999e-06 [parallel-infer-symbol-second]: 7.00005e-07 [dataset_repeat_opt]: 1.96e-06 [pipeline_split]: 1.82999e-06 [optimize]: 0.0145486, [53] [py_interpret_to_execute]: 3.506e-05 [rewriter_before_opt_a]: 9.734e-05 [opt_a]: 0.0108661, [2] [Cycle 1]: 0.0089219, [45] [expand_dump_flag]: 3.45e-06 [switch_simplify]: 4.575e-05 [loop_unroll]: 3.01e-05 [a_1]: 0.00107211 [with_stream_mark]: 2.532e-05 [recompute_prepare]: 1.79e-05 [updatestate_depend_eliminate]: 7.68999e-06 [updatestate_assign_eliminate]: 6.74001e-06 [updatestate_loads_eliminate]: 5.75001e-06 [parameter_eliminate]: 1.93002e-06 [a_2]: 0.00024379 [accelerated_algorithm]: 1.5e-05 [shard]: 2.81e-06 [meta_shard_fg_expand]: 3.36999e-06 [shard_inline]: 1.263e-05 [merge_send_recv]: 1.32e-05 [auto_parallel]: 1.054e-05 [parallel]: 3.318e-05 [flash_sp]: 1.094e-05 [merge_comm]: 7.17002e-06 [allreduce_fusion]: 7.31999e-06 [matmul_add_comm_reduction]: 0.00012846 [allreduce_slice_to_reducescatter]: 1.12999e-06 [virtual_shard_identity]: 1.934e-05 [virtual_dataset]: 1.455e-05 [get_grad_eliminate_]: 1.341e-05 [virtual_output]: 1.298e-05 [merge_forward]: 9.25999e-06 [cell_reuse_recompute_pass]: 1.52001e-06 [offload_activation]: 1.866e-05 [cell_reuse_handle_not_recompute_node_pass]: 3.06e-05 [merge_recompute_call_nodes]: 1.67001e-06 [before_grad]: 2.352e-05 [set_forward_comm_id_for_comm_node_pass]: 9.22999e-06 [meta_fg_expand]: 5.84e-06 [flash_sp_send_recv_attached]: 3.46001e-06 [receive_attached]: 2.64001e-06 [after_resolve]: 1.93e-05 [a_after_grad]: 2.217e-05 [renormalize]: 0.00640662 [add_forward_monad_depend]: 3.899e-05 [auto_monad_grad]: 3.01001e-06 [auto_monad_eliminator]: 3.602e-05 [cse]: 7.311e-05 [a_3]: 0.00011273 [Cycle 2]: 0.00192841, [45] [expand_dump_flag]: 3.05998e-06 [switch_simplify]: 1.717e-05 [loop_unroll]: 1.308e-05 [a_1]: 0.00038296 [with_stream_mark]: 2.538e-05 [recompute_prepare]: 1.232e-05 [updatestate_depend_eliminate]: 8.80001e-06 [updatestate_assign_eliminate]: 6.04001e-06 [updatestate_loads_eliminate]: 5.84999e-06 [parameter_eliminate]: 1.99999e-06 [a_2]: 0.00025616 [accelerated_algorithm]: 1.33e-05 [shard]: 2.88e-06 [meta_shard_fg_expand]: 3.26001e-06 [shard_inline]: 1.29e-05 [merge_send_recv]: 1.389e-05 [auto_parallel]: 1.424e-05 [parallel]: 1.178e-05 [flash_sp]: 4.23001e-06 [merge_comm]: 7e-06 [allreduce_fusion]: 6.46e-06 [matmul_add_comm_reduction]: 1.503e-05 [allreduce_slice_to_reducescatter]: 8.70001e-07 [virtual_shard_identity]: 1.374e-05 [virtual_dataset]: 1.251e-05 [get_grad_eliminate_]: 1.341e-05 [virtual_output]: 1.166e-05 [merge_forward]: 7.56999e-06 [cell_reuse_recompute_pass]: 3.09999e-06 [offload_activation]: 1.73e-05 [cell_reuse_handle_not_recompute_node_pass]: 2.569e-05 [merge_recompute_call_nodes]: 1.61998e-06 [before_grad]: 2.215e-05 [set_forward_comm_id_for_comm_node_pass]: 7.34002e-06 [meta_fg_expand]: 5.56e-06 [flash_sp_send_recv_attached]: 1.95001e-06 [receive_attached]: 2.80002e-06 [after_resolve]: 1.741e-05 [a_after_grad]: 1.846e-05 [renormalize]: 6.99947e-08 [add_forward_monad_depend]: 3.9e-06 [auto_monad_grad]: 1.84e-06 [auto_monad_eliminator]: 1.592e-05 [cse]: 0.00053762 [a_3]: 9.331e-05 [py_interpret_to_execute_after_opt_a]: 2.979e-05 [slice_cell_reuse_recomputed_activation]: 2.74999e-06 [rewriter_after_opt_a]: 0.00029531 [convert_after_rewriter]: 1.426e-05 [order_py_execute_after_rewriter]: 9.15001e-06 [mutable_eliminate]: 0.0007788 [opt_b]: 0.00046317, [1] [Cycle 1]: 0.00045507, [7] [b_1]: 0.00032167 [b_2]: 1.437e-05 [updatestate_depend_eliminate]: 1.024e-05 [updatestate_assign_eliminate]: 5.64998e-06 [updatestate_loads_eliminate]: 5.64998e-06 [renormalize]: 1.10001e-06 [cse]: 5.336e-05 [optimize_parallel_all_gather_comm]: 2.703e-05 [overlap_param_gather]: 2.36998e-06 [cconv]: 3.866e-05 [loop_unroll]: 0.00051129 [opt_after_cconv]: 0.00017365, [1] [Cycle 1]: 0.00016581, [7] [c_1]: 6.26e-05 [parameter_eliminate]: 4.12003e-06 [updatestate_depend_eliminate]: 1.103e-05 [updatestate_assign_eliminate]: 5.22999e-06 [updatestate_loads_eliminate]: 5.10001e-06 [cse]: 3.978e-05 [renormalize]: 6.09987e-07 [remove_dup_value]: 6.233e-05 [tuple_transform]: 0.00022736, [1] [Cycle 1]: 0.00022218, [4] [d_1]: 0.00017964 [none_parameter_eliminate]: 2.28002e-06 [renormalize]: 1.8999e-07 [switch_simplify]: 1.393e-05 [partial_unused_args_eliminate]: 2.14e-06 [add_recomputation]: 0.00010665 [cse_after_recomputation]: 4.426e-05, [1] [Cycle 1]: 3.812e-05, [1] [cse]: 3.149e-05 [environ_conv]: 1.839e-05 [swap_dp_allreduce_reducescatter]: 9.62001e-06 [bias_add_comm_swap]: 3.16001e-06 [label_micro_interleaved_index]: 4.90001e-06 [label_fine_grained_interleaved_index]: 2.84999e-06 [merge_cast_opt]: 1.34998e-06 [slice_recompute_activation]: 2.26e-06 [micro_interleaved_order_control]: 2.50002e-06 [assign_add_opt]: 1.29e-06 [ForceFp32Comm]: 8.70001e-07 [remove_cast_before_assign_add]: 1.24e-06 [full_micro_interleaved_order_control]: 2.69999e-06 [reorder_send_recv_between_fp_bp]: 2.84001e-06 [comm_op_add_attrs]: 1.14e-06 [add_comm_op_reuse_tag]: 1.39e-06 [interleave_split_concat_branches]: 1.61998e-06 [interleave_parallel_branches]: 1.37e-06 [overlap_opt_shard_in_pipeline]: 1.30999e-06 [overlap_opt_shard_grad_in_pipeline]: 2.51998e-06 [control_data_broadcast_order]: 2.369e-05 [grouped_pairwise_exchange_alltoall]: 1.91e-06 [offloading_packed_experts]: 7.65e-06 [overlap_recompute_and_grad_model_parallel]: 8.1e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.64e-06 [overlap_recompute_allgather_and_fa_grad]: 1.62001e-06 [overlap_recompute_comm]: 2.44999e-06 [overlap_grad_ring_attention]: 6.84999e-06 [overlap_grad_flash_sp]: 3.345e-05 [begin_end_overlap_inline]: 6.00005e-07 [split_matmul_comm_elemetwise]: 2.24999e-06 [split_layernorm_comm]: 1.76e-06 [handle_group_info]: 9.5999e-07 [symbol_engine_optimizer]: 0.00033339, [1] [Cycle 1]: 0.00032809, [6] [build]: 0.00017673 [elim_shapecalc]: 2.075e-05 [elim_not_effective]: 3.65e-05 [opt_reshape]: 1.46e-05 [fold_const_symbol]: 3.74e-05 [renormalize]: 3.50003e-07 [detach_backward]: 2.91e-06 [pipeline_parallel_scheduler]: 1.94999e-06 [auto_monad_reorder]: 2.987e-05 [get_jit_bprop_graph]: 3.48999e-06 [rewriter_after_jit_bprop_graph]: 5.40001e-06 [opt_after_jit_grad]: 0.00062977 [validate]: 0.0001228 Sums bootstrap : 0.000782s : 0.05% type_inference : 1.409030s : 98.93% event_method : 0.000027s : 0.00% auto_monad : 0.000100s : 0.01% graph_reusing : 0.000007s : 0.00% inline : 0.000007s : 0.00% add_attr.add_attr_with_inline.tag_attr : 0.000024s : 0.00% add_attr.add_attr_with_inline.meta_addattr_fg_expand : 0.000006s : 0.00% parallel-infer-symbol : 0.000004s : 0.00% pre_auto_parallel : 0.000046s : 0.00% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000035s : 0.00% optimize.rewriter_before_opt_a : 0.000097s : 0.01% optimize.opt_a.expand_dump_flag : 0.000007s : 0.00% optimize.opt_a.switch_simplify : 0.000063s : 0.00% optimize.opt_a.loop_unroll : 0.000043s : 0.00% optimize.opt_a.a_1 : 0.001455s : 0.10% optimize.opt_a.with_stream_mark : 0.000051s : 0.00% optimize.opt_a.recompute_prepare : 0.000030s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000016s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000013s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.00% optimize.opt_a.parameter_eliminate : 0.000004s : 0.00% optimize.opt_a.a_2 : 0.000500s : 0.04% optimize.opt_a.accelerated_algorithm : 0.000028s : 0.00% optimize.opt_a.shard : 0.000006s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000007s : 0.00% optimize.opt_a.shard_inline : 0.000026s : 0.00% optimize.opt_a.merge_send_recv : 0.000027s : 0.00% optimize.opt_a.auto_parallel : 0.000025s : 0.00% optimize.opt_a.parallel : 0.000045s : 0.00% optimize.opt_a.flash_sp : 0.000015s : 0.00% optimize.opt_a.merge_comm : 0.000014s : 0.00% optimize.opt_a.allreduce_fusion : 0.000014s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000143s : 0.01% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000033s : 0.00% optimize.opt_a.virtual_dataset : 0.000027s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000027s : 0.00% optimize.opt_a.virtual_output : 0.000025s : 0.00% optimize.opt_a.merge_forward : 0.000017s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000005s : 0.00% optimize.opt_a.offload_activation : 0.000036s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000056s : 0.00% optimize.opt_a.merge_recompute_call_nodes : 0.000003s : 0.00% optimize.opt_a.before_grad : 0.000046s : 0.00% optimize.opt_a.set_forward_comm_id_for_comm_node_pass : 0.000017s : 0.00% optimize.opt_a.meta_fg_expand : 0.000011s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.00% optimize.opt_a.receive_attached : 0.000005s : 0.00% optimize.opt_a.after_resolve : 0.000037s : 0.00% optimize.opt_a.a_after_grad : 0.000041s : 0.00% optimize.opt_a.renormalize : 0.006407s : 0.45% optimize.opt_a.add_forward_monad_depend : 0.000043s : 0.00% optimize.opt_a.auto_monad_grad : 0.000005s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000052s : 0.00% optimize.opt_a.cse : 0.000611s : 0.04% optimize.opt_a.a_3 : 0.000206s : 0.01% optimize.py_interpret_to_execute_after_opt_a : 0.000030s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000003s : 0.00% optimize.rewriter_after_opt_a : 0.000295s : 0.02% optimize.convert_after_rewriter : 0.000014s : 0.00% optimize.order_py_execute_after_rewriter : 0.000009s : 0.00% optimize.mutable_eliminate : 0.000779s : 0.05% optimize.opt_b.b_1 : 0.000322s : 0.02% optimize.opt_b.b_2 : 0.000014s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000010s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000006s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000053s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000027s : 0.00% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000039s : 0.00% optimize.loop_unroll : 0.000511s : 0.04% optimize.opt_after_cconv.c_1 : 0.000063s : 0.00% optimize.opt_after_cconv.parameter_eliminate : 0.000004s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000011s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.cse : 0.000040s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000062s : 0.00% optimize.tuple_transform.d_1 : 0.000180s : 0.01% optimize.tuple_transform.none_parameter_eliminate : 0.000002s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.tuple_transform.switch_simplify : 0.000014s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_recomputation : 0.000107s : 0.01% optimize.cse_after_recomputation.cse : 0.000031s : 0.00% optimize.environ_conv : 0.000018s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000010s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000005s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000003s : 0.00% optimize.merge_cast_opt : 0.000001s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000003s : 0.00% optimize.assign_add_opt : 0.000001s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000002s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000003s : 0.00% optimize.control_data_broadcast_order : 0.000024s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000008s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000008s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000002s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000002s : 0.00% optimize.overlap_recompute_comm : 0.000002s : 0.00% optimize.overlap_grad_ring_attention : 0.000007s : 0.00% optimize.overlap_grad_flash_sp : 0.000033s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000177s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000021s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000037s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000015s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000037s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000000s : 0.00% detach_backward : 0.000003s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000030s : 0.00% get_jit_bprop_graph : 0.000003s : 0.00% rewriter_after_jit_bprop_graph : 0.000005s : 0.00% opt_after_jit_grad : 0.000630s : 0.04% validate : 0.000123s : 0.01% Time group info: ------[substitution.] 0.000552 111 8.70% : 0.000048s : 6: substitution.arithmetic_simplify 1.10% : 0.000006s : 6: substitution.elim_not_effective 2.31% : 0.000013s : 6: substitution.float_tuple_getitem_switch 3.28% : 0.000018s : 6: substitution.fold_const_symbol 2.00% : 0.000011s : 8: substitution.graph_param_transform 37.47% : 0.000207s : 5: substitution.inline 1.49% : 0.000008s : 12: substitution.j_node_and_user_rematch 2.07% : 0.000011s : 4: substitution.minmaximum_grad 2.18% : 0.000012s : 12: substitution.remove_not_recompute_node 1.19% : 0.000007s : 2: substitution.replace_old_param 16.62% : 0.000092s : 8: substitution.tuple_list_convert_item_index_to_positive 2.95% : 0.000016s : 8: substitution.tuple_list_get_item_const_eliminator 4.10% : 0.000023s : 8: substitution.tuple_list_get_item_depend_reorder 9.97% : 0.000055s : 12: substitution.tuple_list_get_item_eliminator 4.55% : 0.000025s : 8: substitution.tuple_list_get_set_item_eliminator ------[type_inference.] 1.408876 2 99.68% : 1.404349s : 1: type_inference.infer 0.32% : 0.004527s : 1: type_inference.specialize ------[replace.] 0.000048 5 100.00% : 0.000048s : 5: replace.inline ------[match.] 0.000202 5 100.00% : 0.000202s : 5: match.inline ------[predicate.] 0.000401 2113 0.93% : 0.000004s : 21: predicate.accumulaten_eliminater 0.91% : 0.000004s : 8: predicate.ad_related_special_op_eliminate 0.68% : 0.000003s : 16: predicate.addn_check_dump 0.93% : 0.000004s : 21: predicate.addn_zero_filter 0.82% : 0.000003s : 21: predicate.adjust_all_reduce_mul_add 2.45% : 0.000010s : 37: predicate.arithmetic_simplify 0.95% : 0.000004s : 21: predicate.cast_eliminate 0.70% : 0.000003s : 16: predicate.check_bprop_eliminate 0.69% : 0.000003s : 16: predicate.compare_switch_simplify 0.21% : 0.000001s : 8: predicate.const_output_eliminate 0.67% : 0.000003s : 16: predicate.depend_value_elim 0.89% : 0.000004s : 21: predicate.dict_get_item_const_eliminator 0.98% : 0.000004s : 21: predicate.dict_get_item_eliminator 0.97% : 0.000004s : 21: predicate.dict_set_item_eliminator 0.92% : 0.000004s : 16: predicate.dumpgradient_eliminate 0.21% : 0.000001s : 8: predicate.elim_not_effective 0.44% : 0.000002s : 8: predicate.elim_shapecalc_of_broadcastargs 1.19% : 0.000005s : 29: predicate.environ_add_const_eliminate 1.17% : 0.000005s : 29: predicate.environ_get_add_eliminate 1.12% : 0.000005s : 29: predicate.environ_get_depend_swap 1.98% : 0.000008s : 45: predicate.environ_get_eliminate 1.13% : 0.000005s : 29: predicate.environ_get_set_eliminate 1.08% : 0.000004s : 26: predicate.exchange_switch_depend_value 1.69% : 0.000007s : 26: predicate.float_depend_g_call 0.67% : 0.000003s : 16: predicate.float_environ_get_switch 1.15% : 0.000005s : 24: predicate.float_tuple_getitem_switch 0.21% : 0.000001s : 8: predicate.fold_const_symbol 0.88% : 0.000004s : 16: predicate.get_grad_eliminate 0.27% : 0.000001s : 8: predicate.graph_param_transform 0.68% : 0.000003s : 16: predicate.incorporate_call 0.64% : 0.000003s : 16: predicate.incorporate_call_switch 6.06% : 0.000024s : 95: predicate.inline 0.88% : 0.000004s : 16: predicate.inline_without_move 0.42% : 0.000002s : 16: predicate.j_node_and_user_rematch 0.91% : 0.000004s : 16: predicate.less_batch_normalization 1.83% : 0.000007s : 37: predicate.list_to_tuple_eliminator_ 2.26% : 0.000009s : 58: predicate.load_eliminater 0.84% : 0.000003s : 8: predicate.loop_unroll_after_grad 1.53% : 0.000006s : 36: predicate.loop_unroll_before_grad 1.77% : 0.000007s : 37: predicate.make_slice_get_slice_eliminator 0.68% : 0.000003s : 16: predicate.merge_addn 0.66% : 0.000003s : 16: predicate.micro_step_allgather_replace 0.70% : 0.000003s : 16: predicate.mini_step_allgather_replace 0.86% : 0.000003s : 21: predicate.minmaximum_grad 0.87% : 0.000003s : 8: predicate.mutable_eliminate 0.38% : 0.000002s : 8: predicate.opt_reshape 0.50% : 0.000002s : 8: predicate.parallel_virtual_node 1.38% : 0.000006s : 26: predicate.partial_defer_inline 1.31% : 0.000005s : 29: predicate.partial_eliminate 0.87% : 0.000003s : 21: predicate.print_const_string_wrapper 0.73% : 0.000003s : 16: predicate.reduce_all_const_elim 1.11% : 0.000004s : 21: predicate.reduce_eliminate 2.32% : 0.000009s : 58: predicate.redundant_stop_gradient_eliminater 0.41% : 0.000002s : 16: predicate.remove_not_recompute_node 1.32% : 0.000005s : 37: predicate.replace_applicator 0.42% : 0.000002s : 16: predicate.replace_old_param 0.26% : 0.000001s : 8: predicate.reset_defer_inline 0.95% : 0.000004s : 21: predicate.reshape_eliminate 0.72% : 0.000003s : 16: predicate.row_tensor_add_zeros_like 0.44% : 0.000002s : 8: predicate.row_tensor_eliminate 1.16% : 0.000005s : 16: predicate.same_eliminate 0.47% : 0.000002s : 16: predicate.set_cell_output_no_recompute 0.88% : 0.000004s : 16: predicate.shard_identity_eliminate 0.83% : 0.000003s : 16: predicate.special_op_eliminate 0.98% : 0.000004s : 16: predicate.specialize_transform 1.11% : 0.000004s : 16: predicate.split_environ_get_set_with_tuple_value 0.79% : 0.000003s : 16: predicate.stack_unstack_eliminate 0.37% : 0.000001s : 8: predicate.switch_call_monad_eliminater 1.13% : 0.000005s : 26: predicate.switch_defer_inline 1.82% : 0.000007s : 42: predicate.switch_layer_defer_inline 4.15% : 0.000017s : 86: predicate.switch_simplify 0.86% : 0.000003s : 21: predicate.tile_eliminate 0.96% : 0.000004s : 21: predicate.transpose_eliminate 1.87% : 0.000007s : 37: predicate.tuple_list_convert_item_index_to_positive 1.89% : 0.000008s : 37: predicate.tuple_list_get_item_const_eliminator 1.63% : 0.000007s : 37: predicate.tuple_list_get_item_depend_reorder 3.53% : 0.000014s : 53: predicate.tuple_list_get_item_eliminator 1.79% : 0.000007s : 37: predicate.tuple_list_get_set_item_eliminator 2.63% : 0.000011s : 53: predicate.tuple_list_set_item_eliminator 1.56% : 0.000006s : 37: predicate.tuple_to_list_eliminator_ 2.26% : 0.000009s : 58: predicate.updatestate_pure_node_eliminater 3.06% : 0.000012s : 74: predicate.updatestate_useless_node_eliminater 0.41% : 0.000002s : 8: predicate.value_based_eliminate 0.75% : 0.000003s : 16: predicate.virtual_dataset_eliminate 0.79% : 0.000003s : 16: predicate.virtual_output_eliminate 0.31% : 0.000001s : 8: predicate.virtual_view_grad_eliminate 0.42% : 0.000002s : 8: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.006004 32 68.73% : 0.004126s : 25: func_graph_cloner_run.FuncGraphClonerGraph 31.27% : 0.001878s : 7: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 1.543110 192 0.00% : 0.000004s : 1: ForceFp32Comm 3.03% : 0.046778s : 1: add_attr 3.03% : 0.046759s : 1: add_attr_with_inline 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.01% : 0.000112s : 1: add_recomputation 0.00% : 0.000005s : 1: assign_add_opt 0.01% : 0.000107s : 1: auto_monad 0.00% : 0.000035s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000007s : 1: bias_add_comm_swap 0.05% : 0.000845s : 1: bootstrap 0.00% : 0.000042s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000027s : 1: control_data_broadcast_order 0.00% : 0.000019s : 1: convert_after_rewriter 0.00% : 0.000047s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000008s : 1: detach_backward 0.00% : 0.000022s : 1: environ_conv 0.00% : 0.000036s : 1: event_method 0.00% : 0.000007s : 1: full_micro_interleaved_order_control 0.00% : 0.000007s : 1: get_jit_bprop_graph 0.00% : 0.000011s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000005s : 1: handle_group_info 0.00% : 0.000011s : 1: inline 0.00% : 0.000006s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000008s : 1: label_micro_interleaved_index 0.03% : 0.000522s : 1: loop_unroll 0.00% : 0.000005s : 1: merge_cast_opt 0.00% : 0.000006s : 1: micro_interleaved_order_control 0.05% : 0.000790s : 1: mutable_eliminate 0.00% : 0.000011s : 1: offloading_packed_experts 0.00% : 0.000022s : 1: opt.transform.loop_unroll_optimizer 0.00% : 0.000026s : 1: opt.transform.mutable_eliminate 0.16% : 0.002518s : 78: opt.transform.opt_a 0.00% : 0.000061s : 1: opt.transform.opt_after_cconv 0.00% : 0.000049s : 1: opt.transform.opt_after_jit_grad 0.02% : 0.000300s : 28: opt.transform.opt_b 0.01% : 0.000190s : 2: opt.transform.opt_trans_graph 0.01% : 0.000104s : 4: opt.transform.symbol_engine_opt 0.70% : 0.010870s : 1: opt_a 0.01% : 0.000178s : 1: opt_after_cconv 0.04% : 0.000644s : 1: opt_after_jit_grad 0.03% : 0.000467s : 1: opt_b 0.94% : 0.014556s : 1: optimize 0.00% : 0.000031s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000012s : 1: order_py_execute_after_rewriter 0.00% : 0.000038s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000011s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000006s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000011s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000006s : 1: overlap_recompute_comm 0.00% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.00% : 0.000005s : 1: partial_unused_args_eliminate 0.00% : 0.000005s : 1: pipeline_parallel_scheduler 0.00% : 0.000005s : 1: pipeline_split 0.00% : 0.000050s : 1: pre_auto_parallel 0.00% : 0.000039s : 1: py_interpret_to_execute 0.00% : 0.000034s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.00% : 0.000067s : 1: remove_dup_value 0.28% : 0.004285s : 1: renormalize.infer 0.14% : 0.002105s : 1: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000009s : 1: rewriter_after_jit_bprop_graph 0.02% : 0.000304s : 1: rewriter_after_opt_a 0.01% : 0.000102s : 1: rewriter_before_opt_a 0.00% : 0.000006s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000004s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000013s : 1: swap_dp_allreduce_reducescatter 0.02% : 0.000337s : 1: symbol_engine_optimizer 0.01% : 0.000230s : 1: tuple_transform 91.31% : 1.409075s : 1: type_inference . [hook] pytest_runtest_teardown:test_swiglu[True-float32--1-shape1] tests/st/infer/ops/test_internal_ops/test_swiglu_v2.py::test_swiglu[True-float32--1-shape1],max_mem:100.0M [WARNING] ME(161016:281473848516400,MainProcess):2026-01-29-17:38:08.492.798 [mindspore/context.py:1334] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. TotalTime = 1.63193, [21] [bootstrap]: 0.00075692 [type_inference]: 1.54275 [event_method]: 2.738e-05 [auto_monad]: 9.957e-05 [graph_reusing]: 6.68e-06 [inline]: 3.61999e-06 [add_attr]: 0.00639058, [1] [add_attr_with_inline]: 0.00637108, [1] [Cycle 1]: 8.531e-05, [2] [tag_attr]: 2.764e-05 [meta_addattr_fg_expand]: 7.36999e-06 [parallel-infer-symbol]: 4e-06 [pre_auto_parallel]: 5.162e-05 [insert-virtual-dataset]: 3.31001e-06 [parallel-infer-symbol-second]: 7.50006e-07 [dataset_repeat_opt]: 2.37001e-06 [pipeline_split]: 1.79e-06 [optimize]: 0.0807269, [53] [py_interpret_to_execute]: 3.902e-05 [rewriter_before_opt_a]: 0.00010724 [opt_a]: 0.0767899, [2] [Cycle 1]: 0.0752927, [45] [expand_dump_flag]: 4.02998e-06 [switch_simplify]: 4.822e-05 [loop_unroll]: 3.302e-05 [a_1]: 0.00116853 [with_stream_mark]: 2.987e-05 [recompute_prepare]: 2.004e-05 [updatestate_depend_eliminate]: 1.025e-05 [updatestate_assign_eliminate]: 7.31999e-06 [updatestate_loads_eliminate]: 6.54001e-06 [parameter_eliminate]: 2.27001e-06 [a_2]: 0.00022698 [accelerated_algorithm]: 1.621e-05 [shard]: 2.72001e-06 [meta_shard_fg_expand]: 3.93001e-06 [shard_inline]: 1.412e-05 [merge_send_recv]: 1.491e-05 [auto_parallel]: 1.284e-05 [parallel]: 3.409e-05 [flash_sp]: 1.166e-05 [merge_comm]: 8.14002e-06 [allreduce_fusion]: 7.68001e-06 [matmul_add_comm_reduction]: 1.628e-05 [allreduce_slice_to_reducescatter]: 6.79982e-07 [virtual_shard_identity]: 1.829e-05 [virtual_dataset]: 1.515e-05 [get_grad_eliminate_]: 1.483e-05 [virtual_output]: 1.507e-05 [merge_forward]: 7.58999e-06 [cell_reuse_recompute_pass]: 1.55001e-06 [offload_activation]: 1.835e-05 [cell_reuse_handle_not_recompute_node_pass]: 2.68e-05 [merge_recompute_call_nodes]: 1.54998e-06 [before_grad]: 2.544e-05 [set_forward_comm_id_for_comm_node_pass]: 8.45001e-06 [meta_fg_expand]: 6.03998e-06 [flash_sp_send_recv_attached]: 3.53e-06 [receive_attached]: 2.24001e-06 [after_resolve]: 1.937e-05 [a_after_grad]: 2.332e-05 [renormalize]: 0.0714403 [add_forward_monad_depend]: 1.488e-05 [auto_monad_grad]: 4.12e-06 [auto_monad_eliminator]: 3.865e-05 [cse]: 0.00135088 [a_3]: 0.0001294 [Cycle 2]: 0.00147861, [45] [expand_dump_flag]: 5.10999e-06 [switch_simplify]: 1.714e-05 [loop_unroll]: 1.471e-05 [a_1]: 0.00039579 [with_stream_mark]: 3.761e-05 [recompute_prepare]: 1.52e-05 [updatestate_depend_eliminate]: 9.82999e-06 [updatestate_assign_eliminate]: 6.25002e-06 [updatestate_loads_eliminate]: 5.55001e-06 [parameter_eliminate]: 2.99999e-06 [a_2]: 0.000182 [accelerated_algorithm]: 1.478e-05 [shard]: 2.92002e-06 [meta_shard_fg_expand]: 5.22e-06 [shard_inline]: 1.218e-05 [merge_send_recv]: 1.429e-05 [auto_parallel]: 1.495e-05 [parallel]: 1.213e-05 [flash_sp]: 5.23002e-06 [merge_comm]: 7.25998e-06 [allreduce_fusion]: 6.56e-06 [matmul_add_comm_reduction]: 1.714e-05 [allreduce_slice_to_reducescatter]: 9.30013e-07 [virtual_shard_identity]: 1.432e-05 [virtual_dataset]: 1.206e-05 [get_grad_eliminate_]: 1.262e-05 [virtual_output]: 1.16e-05 [merge_forward]: 7.21001e-06 [cell_reuse_recompute_pass]: 3.21999e-06 [offload_activation]: 1.73e-05 [cell_reuse_handle_not_recompute_node_pass]: 2.599e-05 [merge_recompute_call_nodes]: 1.63002e-06 [before_grad]: 2.183e-05 [set_forward_comm_id_for_comm_node_pass]: 8.42e-06 [meta_fg_expand]: 5.74e-06 [flash_sp_send_recv_attached]: 2.37999e-06 [receive_attached]: 2.59001e-06 [after_resolve]: 1.834e-05 [a_after_grad]: 1.954e-05 [renormalize]: 8.00064e-08 [add_forward_monad_depend]: 3.85998e-06 [auto_monad_grad]: 3.04999e-06 [auto_monad_eliminator]: 2.595e-05 [cse]: 0.00012067 [a_3]: 8.601e-05 [py_interpret_to_execute_after_opt_a]: 2.856e-05 [slice_cell_reuse_recomputed_activation]: 2.27001e-06 [rewriter_after_opt_a]: 0.0002953 [convert_after_rewriter]: 1.485e-05 [order_py_execute_after_rewriter]: 9.61e-06 [mutable_eliminate]: 0.00082212 [opt_b]: 0.00050547, [1] [Cycle 1]: 0.00049539, [7] [b_1]: 0.00033069 [b_2]: 1.434e-05 [updatestate_depend_eliminate]: 1.347e-05 [updatestate_assign_eliminate]: 5.90002e-06 [updatestate_loads_eliminate]: 6.06998e-06 [renormalize]: 8.99978e-07 [cse]: 8.077e-05 [optimize_parallel_all_gather_comm]: 3.44e-05 [overlap_param_gather]: 2.09e-06 [cconv]: 4.289e-05 [loop_unroll]: 0.00063472 [opt_after_cconv]: 0.00019418, [1] [Cycle 1]: 0.00018423, [7] [c_1]: 6.623e-05 [parameter_eliminate]: 6.73e-06 [updatestate_depend_eliminate]: 1.24e-05 [updatestate_assign_eliminate]: 5.51e-06 [updatestate_loads_eliminate]: 4.97e-06 [cse]: 4.981e-05 [renormalize]: 8.10018e-07 [remove_dup_value]: 6.775e-05 [tuple_transform]: 0.00018316, [1] [Cycle 1]: 0.00017752, [4] [d_1]: 0.00013674 [none_parameter_eliminate]: 2.27999e-06 [renormalize]: 3.09985e-07 [switch_simplify]: 1.428e-05 [partial_unused_args_eliminate]: 2.21998e-06 [add_recomputation]: 0.00010133 [cse_after_recomputation]: 4.643e-05, [1] [Cycle 1]: 4.006e-05, [1] [cse]: 3.295e-05 [environ_conv]: 1.86e-05 [swap_dp_allreduce_reducescatter]: 1.206e-05 [bias_add_comm_swap]: 3.31999e-06 [label_micro_interleaved_index]: 7.37002e-06 [label_fine_grained_interleaved_index]: 3.21001e-06 [merge_cast_opt]: 1.51002e-06 [slice_recompute_activation]: 2.34001e-06 [micro_interleaved_order_control]: 2.61e-06 [assign_add_opt]: 1.27e-06 [ForceFp32Comm]: 9.5999e-07 [remove_cast_before_assign_add]: 1.80001e-06 [full_micro_interleaved_order_control]: 2.57001e-06 [reorder_send_recv_between_fp_bp]: 3.26001e-06 [comm_op_add_attrs]: 1.13001e-06 [add_comm_op_reuse_tag]: 1.12e-06 [interleave_split_concat_branches]: 1.50001e-06 [interleave_parallel_branches]: 1.19e-06 [overlap_opt_shard_in_pipeline]: 1.44e-06 [overlap_opt_shard_grad_in_pipeline]: 2.22999e-06 [control_data_broadcast_order]: 2.365e-05 [grouped_pairwise_exchange_alltoall]: 1.67001e-06 [offloading_packed_experts]: 7.11999e-06 [overlap_recompute_and_grad_model_parallel]: 8.13001e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.36998e-06 [overlap_recompute_allgather_and_fa_grad]: 1.67001e-06 [overlap_recompute_comm]: 2.78e-06 [overlap_grad_ring_attention]: 5.85002e-06 [overlap_grad_flash_sp]: 3.558e-05 [begin_end_overlap_inline]: 7.40023e-07 [split_matmul_comm_elemetwise]: 2.59001e-06 [split_layernorm_comm]: 2.09999e-06 [handle_group_info]: 9.5999e-07 [symbol_engine_optimizer]: 0.00034957, [1] [Cycle 1]: 0.00034369, [6] [build]: 0.00018478 [elim_shapecalc]: 2.177e-05 [elim_not_effective]: 3.66e-05 [opt_reshape]: 1.586e-05 [fold_const_symbol]: 3.879e-05 [renormalize]: 5.10016e-07 [detach_backward]: 2.93e-06 [pipeline_parallel_scheduler]: 1.86998e-06 [auto_monad_reorder]: 2.993e-05 [get_jit_bprop_graph]: 4.42e-06 [rewriter_after_jit_bprop_graph]: 6.81001e-06 [opt_after_jit_grad]: 0.0007558 [validate]: 8.505e-05 Sums bootstrap : 0.000757s : 0.05% type_inference : 1.542746s : 94.99% event_method : 0.000027s : 0.00% auto_monad : 0.000100s : 0.01% graph_reusing : 0.000007s : 0.00% inline : 0.000004s : 0.00% add_attr.add_attr_with_inline.tag_attr : 0.000028s : 0.00% add_attr.add_attr_with_inline.meta_addattr_fg_expand : 0.000007s : 0.00% parallel-infer-symbol : 0.000004s : 0.00% pre_auto_parallel : 0.000052s : 0.00% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000039s : 0.00% optimize.rewriter_before_opt_a : 0.000107s : 0.01% optimize.opt_a.expand_dump_flag : 0.000009s : 0.00% optimize.opt_a.switch_simplify : 0.000065s : 0.00% optimize.opt_a.loop_unroll : 0.000048s : 0.00% optimize.opt_a.a_1 : 0.001564s : 0.10% optimize.opt_a.with_stream_mark : 0.000067s : 0.00% optimize.opt_a.recompute_prepare : 0.000035s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000020s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000014s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000012s : 0.00% optimize.opt_a.parameter_eliminate : 0.000005s : 0.00% optimize.opt_a.a_2 : 0.000409s : 0.03% optimize.opt_a.accelerated_algorithm : 0.000031s : 0.00% optimize.opt_a.shard : 0.000006s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000009s : 0.00% optimize.opt_a.shard_inline : 0.000026s : 0.00% optimize.opt_a.merge_send_recv : 0.000029s : 0.00% optimize.opt_a.auto_parallel : 0.000028s : 0.00% optimize.opt_a.parallel : 0.000046s : 0.00% optimize.opt_a.flash_sp : 0.000017s : 0.00% optimize.opt_a.merge_comm : 0.000015s : 0.00% optimize.opt_a.allreduce_fusion : 0.000014s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000033s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000033s : 0.00% optimize.opt_a.virtual_dataset : 0.000027s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000027s : 0.00% optimize.opt_a.virtual_output : 0.000027s : 0.00% optimize.opt_a.merge_forward : 0.000015s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000005s : 0.00% optimize.opt_a.offload_activation : 0.000036s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000053s : 0.00% optimize.opt_a.merge_recompute_call_nodes : 0.000003s : 0.00% optimize.opt_a.before_grad : 0.000047s : 0.00% optimize.opt_a.set_forward_comm_id_for_comm_node_pass : 0.000017s : 0.00% optimize.opt_a.meta_fg_expand : 0.000012s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000006s : 0.00% optimize.opt_a.receive_attached : 0.000005s : 0.00% optimize.opt_a.after_resolve : 0.000038s : 0.00% optimize.opt_a.a_after_grad : 0.000043s : 0.00% optimize.opt_a.renormalize : 0.071440s : 4.40% optimize.opt_a.add_forward_monad_depend : 0.000019s : 0.00% optimize.opt_a.auto_monad_grad : 0.000007s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000065s : 0.00% optimize.opt_a.cse : 0.001472s : 0.09% optimize.opt_a.a_3 : 0.000215s : 0.01% optimize.py_interpret_to_execute_after_opt_a : 0.000029s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000295s : 0.02% optimize.convert_after_rewriter : 0.000015s : 0.00% optimize.order_py_execute_after_rewriter : 0.000010s : 0.00% optimize.mutable_eliminate : 0.000822s : 0.05% optimize.opt_b.b_1 : 0.000331s : 0.02% optimize.opt_b.b_2 : 0.000014s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000013s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000006s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000081s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000034s : 0.00% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000043s : 0.00% optimize.loop_unroll : 0.000635s : 0.04% optimize.opt_after_cconv.c_1 : 0.000066s : 0.00% optimize.opt_after_cconv.parameter_eliminate : 0.000007s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000012s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000006s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.cse : 0.000050s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000068s : 0.00% optimize.tuple_transform.d_1 : 0.000137s : 0.01% optimize.tuple_transform.none_parameter_eliminate : 0.000002s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.tuple_transform.switch_simplify : 0.000014s : 0.00% optimize.partial_unused_args_eliminate : 0.000002s : 0.00% optimize.add_recomputation : 0.000101s : 0.01% optimize.cse_after_recomputation.cse : 0.000033s : 0.00% optimize.environ_conv : 0.000019s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000012s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000007s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000003s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000003s : 0.00% optimize.assign_add_opt : 0.000001s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000002s : 0.00% optimize.full_micro_interleaved_order_control : 0.000003s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000002s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000024s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000007s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000008s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000002s : 0.00% optimize.overlap_recompute_comm : 0.000003s : 0.00% optimize.overlap_grad_ring_attention : 0.000006s : 0.00% optimize.overlap_grad_flash_sp : 0.000036s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000003s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000185s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000022s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000037s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000016s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000039s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000001s : 0.00% detach_backward : 0.000003s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000030s : 0.00% get_jit_bprop_graph : 0.000004s : 0.00% rewriter_after_jit_bprop_graph : 0.000007s : 0.00% opt_after_jit_grad : 0.000756s : 0.05% validate : 0.000085s : 0.01% Time group info: ------[substitution.] 0.000531 111 9.31% : 0.000049s : 6: substitution.arithmetic_simplify 1.14% : 0.000006s : 6: substitution.elim_not_effective 2.45% : 0.000013s : 6: substitution.float_tuple_getitem_switch 3.83% : 0.000020s : 6: substitution.fold_const_symbol 1.94% : 0.000010s : 8: substitution.graph_param_transform 43.57% : 0.000231s : 5: substitution.inline 1.76% : 0.000009s : 12: substitution.j_node_and_user_rematch 2.19% : 0.000012s : 4: substitution.minmaximum_grad 2.02% : 0.000011s : 12: substitution.remove_not_recompute_node 1.19% : 0.000006s : 2: substitution.replace_old_param 7.12% : 0.000038s : 8: substitution.tuple_list_convert_item_index_to_positive 3.07% : 0.000016s : 8: substitution.tuple_list_get_item_const_eliminator 4.74% : 0.000025s : 8: substitution.tuple_list_get_item_depend_reorder 11.20% : 0.000059s : 12: substitution.tuple_list_get_item_eliminator 4.47% : 0.000024s : 8: substitution.tuple_list_get_set_item_eliminator ------[type_inference.] 1.542624 2 99.74% : 1.538575s : 1: type_inference.infer 0.26% : 0.004048s : 1: type_inference.specialize ------[replace.] 0.000055 5 100.00% : 0.000055s : 5: replace.inline ------[match.] 0.000227 5 100.00% : 0.000227s : 5: match.inline ------[predicate.] 0.000435 2113 0.92% : 0.000004s : 21: predicate.accumulaten_eliminater 0.85% : 0.000004s : 8: predicate.ad_related_special_op_eliminate 0.64% : 0.000003s : 16: predicate.addn_check_dump 0.98% : 0.000004s : 21: predicate.addn_zero_filter 0.85% : 0.000004s : 21: predicate.adjust_all_reduce_mul_add 2.50% : 0.000011s : 37: predicate.arithmetic_simplify 0.93% : 0.000004s : 21: predicate.cast_eliminate 0.66% : 0.000003s : 16: predicate.check_bprop_eliminate 0.64% : 0.000003s : 16: predicate.compare_switch_simplify 0.20% : 0.000001s : 8: predicate.const_output_eliminate 0.70% : 0.000003s : 16: predicate.depend_value_elim 0.89% : 0.000004s : 21: predicate.dict_get_item_const_eliminator 0.97% : 0.000004s : 21: predicate.dict_get_item_eliminator 0.85% : 0.000004s : 21: predicate.dict_set_item_eliminator 1.01% : 0.000004s : 16: predicate.dumpgradient_eliminate 0.25% : 0.000001s : 8: predicate.elim_not_effective 0.39% : 0.000002s : 8: predicate.elim_shapecalc_of_broadcastargs 1.18% : 0.000005s : 29: predicate.environ_add_const_eliminate 1.14% : 0.000005s : 29: predicate.environ_get_add_eliminate 1.15% : 0.000005s : 29: predicate.environ_get_depend_swap 1.92% : 0.000008s : 45: predicate.environ_get_eliminate 1.15% : 0.000005s : 29: predicate.environ_get_set_eliminate 1.12% : 0.000005s : 26: predicate.exchange_switch_depend_value 1.72% : 0.000007s : 26: predicate.float_depend_g_call 0.64% : 0.000003s : 16: predicate.float_environ_get_switch 1.11% : 0.000005s : 24: predicate.float_tuple_getitem_switch 0.19% : 0.000001s : 8: predicate.fold_const_symbol 0.75% : 0.000003s : 16: predicate.get_grad_eliminate 0.23% : 0.000001s : 8: predicate.graph_param_transform 0.67% : 0.000003s : 16: predicate.incorporate_call 0.59% : 0.000003s : 16: predicate.incorporate_call_switch 5.62% : 0.000024s : 95: predicate.inline 0.93% : 0.000004s : 16: predicate.inline_without_move 0.35% : 0.000002s : 16: predicate.j_node_and_user_rematch 1.01% : 0.000004s : 16: predicate.less_batch_normalization 1.82% : 0.000008s : 37: predicate.list_to_tuple_eliminator_ 2.29% : 0.000010s : 58: predicate.load_eliminater 1.01% : 0.000004s : 8: predicate.loop_unroll_after_grad 1.53% : 0.000007s : 36: predicate.loop_unroll_before_grad 1.78% : 0.000008s : 37: predicate.make_slice_get_slice_eliminator 0.69% : 0.000003s : 16: predicate.merge_addn 0.75% : 0.000003s : 16: predicate.micro_step_allgather_replace 0.72% : 0.000003s : 16: predicate.mini_step_allgather_replace 0.86% : 0.000004s : 21: predicate.minmaximum_grad 1.23% : 0.000005s : 8: predicate.mutable_eliminate 0.42% : 0.000002s : 8: predicate.opt_reshape 0.47% : 0.000002s : 8: predicate.parallel_virtual_node 1.65% : 0.000007s : 26: predicate.partial_defer_inline 1.27% : 0.000006s : 29: predicate.partial_eliminate 0.94% : 0.000004s : 21: predicate.print_const_string_wrapper 0.72% : 0.000003s : 16: predicate.reduce_all_const_elim 1.27% : 0.000006s : 21: predicate.reduce_eliminate 2.36% : 0.000010s : 58: predicate.redundant_stop_gradient_eliminater 0.35% : 0.000002s : 16: predicate.remove_not_recompute_node 1.26% : 0.000005s : 37: predicate.replace_applicator 0.44% : 0.000002s : 16: predicate.replace_old_param 0.29% : 0.000001s : 8: predicate.reset_defer_inline 0.97% : 0.000004s : 21: predicate.reshape_eliminate 0.70% : 0.000003s : 16: predicate.row_tensor_add_zeros_like 0.49% : 0.000002s : 8: predicate.row_tensor_eliminate 1.14% : 0.000005s : 16: predicate.same_eliminate 0.47% : 0.000002s : 16: predicate.set_cell_output_no_recompute 0.79% : 0.000003s : 16: predicate.shard_identity_eliminate 0.79% : 0.000003s : 16: predicate.special_op_eliminate 0.83% : 0.000004s : 16: predicate.specialize_transform 1.02% : 0.000004s : 16: predicate.split_environ_get_set_with_tuple_value 0.77% : 0.000003s : 16: predicate.stack_unstack_eliminate 0.38% : 0.000002s : 8: predicate.switch_call_monad_eliminater 1.21% : 0.000005s : 26: predicate.switch_defer_inline 1.83% : 0.000008s : 42: predicate.switch_layer_defer_inline 4.00% : 0.000017s : 86: predicate.switch_simplify 0.92% : 0.000004s : 21: predicate.tile_eliminate 0.92% : 0.000004s : 21: predicate.transpose_eliminate 1.85% : 0.000008s : 37: predicate.tuple_list_convert_item_index_to_positive 1.88% : 0.000008s : 37: predicate.tuple_list_get_item_const_eliminator 1.66% : 0.000007s : 37: predicate.tuple_list_get_item_depend_reorder 3.33% : 0.000014s : 53: predicate.tuple_list_get_item_eliminator 1.86% : 0.000008s : 37: predicate.tuple_list_get_set_item_eliminator 2.70% : 0.000012s : 53: predicate.tuple_list_set_item_eliminator 1.57% : 0.000007s : 37: predicate.tuple_to_list_eliminator_ 2.33% : 0.000010s : 58: predicate.updatestate_pure_node_eliminater 3.02% : 0.000013s : 74: predicate.updatestate_useless_node_eliminater 0.36% : 0.000002s : 8: predicate.value_based_eliminate 0.86% : 0.000004s : 16: predicate.virtual_dataset_eliminate 0.73% : 0.000003s : 16: predicate.virtual_output_eliminate 0.35% : 0.000002s : 8: predicate.virtual_view_grad_eliminate 0.44% : 0.000002s : 8: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.007052 32 64.18% : 0.004526s : 25: func_graph_cloner_run.FuncGraphClonerGraph 35.82% : 0.002526s : 7: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 1.793558 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.36% : 0.006398s : 1: add_attr 0.36% : 0.006376s : 1: add_attr_with_inline 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.01% : 0.000106s : 1: add_recomputation 0.00% : 0.000005s : 1: assign_add_opt 0.01% : 0.000106s : 1: auto_monad 0.00% : 0.000034s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000008s : 1: bias_add_comm_swap 0.04% : 0.000806s : 1: bootstrap 0.00% : 0.000047s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000027s : 1: control_data_broadcast_order 0.00% : 0.000019s : 1: convert_after_rewriter 0.00% : 0.000050s : 1: cse_after_recomputation 0.00% : 0.000005s : 1: dataset_repeat_opt 0.00% : 0.000009s : 1: detach_backward 0.00% : 0.000023s : 1: environ_conv 0.00% : 0.000036s : 1: event_method 0.00% : 0.000006s : 1: full_micro_interleaved_order_control 0.00% : 0.000008s : 1: get_jit_bprop_graph 0.00% : 0.000011s : 1: graph_reusing 0.00% : 0.000004s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000005s : 1: handle_group_info 0.00% : 0.000007s : 1: inline 0.00% : 0.000007s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000004s : 1: interleave_split_concat_branches 0.00% : 0.000007s : 1: label_fine_grained_interleaved_index 0.00% : 0.000011s : 1: label_micro_interleaved_index 0.04% : 0.000648s : 1: loop_unroll 0.00% : 0.000006s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.05% : 0.000836s : 1: mutable_eliminate 0.00% : 0.000010s : 1: offloading_packed_experts 0.00% : 0.000030s : 1: opt.transform.loop_unroll_optimizer 0.00% : 0.000034s : 1: opt.transform.mutable_eliminate 0.14% : 0.002557s : 78: opt.transform.opt_a 0.00% : 0.000065s : 1: opt.transform.opt_after_cconv 0.00% : 0.000053s : 1: opt.transform.opt_after_jit_grad 0.02% : 0.000307s : 28: opt.transform.opt_b 0.01% : 0.000148s : 2: opt.transform.opt_trans_graph 0.01% : 0.000107s : 4: opt.transform.symbol_engine_opt 4.28% : 0.076794s : 1: opt_a 0.01% : 0.000198s : 1: opt_after_cconv 0.04% : 0.000772s : 1: opt_after_jit_grad 0.03% : 0.000510s : 1: opt_b 4.50% : 0.080734s : 1: optimize 0.00% : 0.000039s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000013s : 1: order_py_execute_after_rewriter 0.00% : 0.000040s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000010s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000006s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000011s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000006s : 1: overlap_recompute_comm 0.00% : 0.000009s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.00% : 0.000006s : 1: partial_unused_args_eliminate 0.00% : 0.000005s : 1: pipeline_parallel_scheduler 0.00% : 0.000005s : 1: pipeline_split 0.00% : 0.000056s : 1: pre_auto_parallel 0.00% : 0.000044s : 1: py_interpret_to_execute 0.00% : 0.000033s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000005s : 1: remove_cast_before_assign_add 0.00% : 0.000073s : 1: remove_dup_value 3.82% : 0.068585s : 1: renormalize.infer 0.16% : 0.002838s : 1: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000011s : 1: rewriter_after_jit_bprop_graph 0.02% : 0.000305s : 1: rewriter_after_opt_a 0.01% : 0.000113s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000006s : 1: split_matmul_comm_elemetwise 0.00% : 0.000016s : 1: swap_dp_allreduce_reducescatter 0.02% : 0.000353s : 1: symbol_engine_optimizer 0.01% : 0.000186s : 1: tuple_transform 86.02% : 1.542781s : 1: type_inference . [hook] pytest_runtest_teardown:test_swiglu[True-float16--1-shape0] tests/st/infer/ops/test_internal_ops/test_swiglu_v2.py::test_swiglu[True-float16--1-shape0],max_mem:100.0M [WARNING] ME(161016:281473848516400,MainProcess):2026-01-29-17:38:13.130.807 [mindspore/context.py:1334] For 'context.set_context', the parameter 'device_target' will be deprecated and removed in a future version. Please use the api mindspore.set_device() instead. TotalTime = 1.75133, [21] [bootstrap]: 0.00094572 [type_inference]: 1.63785 [event_method]: 2.269e-05 [auto_monad]: 9.405e-05 [graph_reusing]: 7.03998e-06 [inline]: 2.66e-06 [add_attr]: 0.00608181, [1] [add_attr_with_inline]: 0.00606603, [1] [Cycle 1]: 7.411e-05, [2] [tag_attr]: 2.368e-05 [meta_addattr_fg_expand]: 6.41e-06 [parallel-infer-symbol]: 4.58999e-06 [pre_auto_parallel]: 4.167e-05 [insert-virtual-dataset]: 2.61999e-06 [parallel-infer-symbol-second]: 7.59988e-07 [dataset_repeat_opt]: 1.86998e-06 [pipeline_split]: 1.74998e-06 [optimize]: 0.104651, [53] [py_interpret_to_execute]: 2.924e-05 [rewriter_before_opt_a]: 9.148e-05 [opt_a]: 0.100032, [2] [Cycle 1]: 0.0984637, [45] [expand_dump_flag]: 3.64002e-06 [switch_simplify]: 4.7e-05 [loop_unroll]: 3.058e-05 [a_1]: 0.00101985 [with_stream_mark]: 2.131e-05 [recompute_prepare]: 1.646e-05 [updatestate_depend_eliminate]: 7.63001e-06 [updatestate_assign_eliminate]: 7.16999e-06 [updatestate_loads_eliminate]: 5.82001e-06 [parameter_eliminate]: 1.93002e-06 [a_2]: 0.00020376 [accelerated_algorithm]: 1.454e-05 [shard]: 2.64001e-06 [meta_shard_fg_expand]: 3.16001e-06 [shard_inline]: 1.259e-05 [merge_send_recv]: 1.272e-05 [auto_parallel]: 9.99001e-06 [parallel]: 2.822e-05 [flash_sp]: 9.92999e-06 [merge_comm]: 7.27002e-06 [allreduce_fusion]: 6.79999e-06 [matmul_add_comm_reduction]: 1.634e-05 [allreduce_slice_to_reducescatter]: 9.79984e-07 [virtual_shard_identity]: 1.488e-05 [virtual_dataset]: 1.37e-05 [get_grad_eliminate_]: 1.305e-05 [virtual_output]: 1.346e-05 [merge_forward]: 8.33999e-06 [cell_reuse_recompute_pass]: 1.35999e-06 [offload_activation]: 1.655e-05 [cell_reuse_handle_not_recompute_node_pass]: 2.769e-05 [merge_recompute_call_nodes]: 1.66e-06 [before_grad]: 2.275e-05 [set_forward_comm_id_for_comm_node_pass]: 7.65e-06 [meta_fg_expand]: 5.67001e-06 [flash_sp_send_recv_attached]: 3.06999e-06 [receive_attached]: 2.11998e-06 [after_resolve]: 1.824e-05 [a_after_grad]: 2.103e-05 [renormalize]: 0.0959961 [add_forward_monad_depend]: 1.388e-05 [auto_monad_grad]: 3.01999e-06 [auto_monad_eliminator]: 3.964e-05 [cse]: 0.00027669 [a_3]: 0.00011765 [Cycle 2]: 0.00155098, [45] [expand_dump_flag]: 4.00998e-06 [switch_simplify]: 1.741e-05 [loop_unroll]: 1.364e-05 [a_1]: 0.0005473 [with_stream_mark]: 3.167e-05 [recompute_prepare]: 1.409e-05 [updatestate_depend_eliminate]: 9.52001e-06 [updatestate_assign_eliminate]: 5.86e-06 [updatestate_loads_eliminate]: 5.57999e-06 [parameter_eliminate]: 2.96999e-06 [a_2]: 0.0001867 [accelerated_algorithm]: 1.376e-05 [shard]: 3.22002e-06 [meta_shard_fg_expand]: 4.38001e-06 [shard_inline]: 1.268e-05 [merge_send_recv]: 1.521e-05 [auto_parallel]: 1.436e-05 [parallel]: 1.134e-05 [flash_sp]: 4.99003e-06 [merge_comm]: 7.16999e-06 [allreduce_fusion]: 6.65002e-06 [matmul_add_comm_reduction]: 1.625e-05 [allreduce_slice_to_reducescatter]: 9.39996e-07 [virtual_shard_identity]: 1.433e-05 [virtual_dataset]: 1.208e-05 [get_grad_eliminate_]: 1.228e-05 [virtual_output]: 1.191e-05 [merge_forward]: 7.98001e-06 [cell_reuse_recompute_pass]: 3.7e-06 [offload_activation]: 1.758e-05 [cell_reuse_handle_not_recompute_node_pass]: 2.447e-05 [merge_recompute_call_nodes]: 1.82999e-06 [before_grad]: 2.188e-05 [set_forward_comm_id_for_comm_node_pass]: 7.88999e-06 [meta_fg_expand]: 6.04001e-06 [flash_sp_send_recv_attached]: 2.10002e-06 [receive_attached]: 3.03998e-06 [after_resolve]: 1.808e-05 [a_after_grad]: 2.161e-05 [renormalize]: 1.10012e-07 [add_forward_monad_depend]: 3.04999e-06 [auto_monad_grad]: 2.04999e-06 [auto_monad_eliminator]: 2.36e-05 [cse]: 5.158e-05 [a_3]: 7.919e-05 [py_interpret_to_execute_after_opt_a]: 2.644e-05 [slice_cell_reuse_recomputed_activation]: 2.38002e-06 [rewriter_after_opt_a]: 0.00030791 [convert_after_rewriter]: 1.535e-05 [order_py_execute_after_rewriter]: 1.026e-05 [mutable_eliminate]: 0.00085637 [opt_b]: 0.00050466, [1] [Cycle 1]: 0.000495, [7] [b_1]: 0.00033282 [b_2]: 1.506e-05 [updatestate_depend_eliminate]: 1.586e-05 [updatestate_assign_eliminate]: 5.76998e-06 [updatestate_loads_eliminate]: 6.07001e-06 [renormalize]: 1.27e-06 [cse]: 6.985e-05 [optimize_parallel_all_gather_comm]: 3.148e-05 [overlap_param_gather]: 2.44999e-06 [cconv]: 4.334e-05 [loop_unroll]: 0.00064721 [opt_after_cconv]: 0.00019337, [1] [Cycle 1]: 0.00018416, [7] [c_1]: 6.551e-05 [parameter_eliminate]: 6.21998e-06 [updatestate_depend_eliminate]: 1.225e-05 [updatestate_assign_eliminate]: 5.37999e-06 [updatestate_loads_eliminate]: 4.90999e-06 [cse]: 5.143e-05 [renormalize]: 7.90023e-07 [remove_dup_value]: 0.00013269 [tuple_transform]: 0.00071187, [1] [Cycle 1]: 0.00070451, [4] [d_1]: 0.00063933 [none_parameter_eliminate]: 3.09001e-06 [renormalize]: 3.00002e-07 [switch_simplify]: 1.801e-05 [partial_unused_args_eliminate]: 2.93e-06 [add_recomputation]: 0.00012144 [cse_after_recomputation]: 5.688e-05, [1] [Cycle 1]: 4.927e-05, [1] [cse]: 4.233e-05 [environ_conv]: 1.779e-05 [swap_dp_allreduce_reducescatter]: 1.04e-05 [bias_add_comm_swap]: 3.32002e-06 [label_micro_interleaved_index]: 9.62001e-06 [label_fine_grained_interleaved_index]: 2.93998e-06 [merge_cast_opt]: 1.91998e-06 [slice_recompute_activation]: 2.44001e-06 [micro_interleaved_order_control]: 2.71e-06 [assign_add_opt]: 1.97001e-06 [ForceFp32Comm]: 1.19003e-06 [remove_cast_before_assign_add]: 1.25001e-06 [full_micro_interleaved_order_control]: 2.43998e-06 [reorder_send_recv_between_fp_bp]: 3.21999e-06 [comm_op_add_attrs]: 1.15001e-06 [add_comm_op_reuse_tag]: 1.12e-06 [interleave_split_concat_branches]: 1.77001e-06 [interleave_parallel_branches]: 1.19e-06 [overlap_opt_shard_in_pipeline]: 1.44e-06 [overlap_opt_shard_grad_in_pipeline]: 2.41e-06 [control_data_broadcast_order]: 2.715e-05 [grouped_pairwise_exchange_alltoall]: 1.70001e-06 [offloading_packed_experts]: 8.43999e-06 [overlap_recompute_and_grad_model_parallel]: 7.93001e-06 [overlap_grad_matmul_and_grad_allreduce]: 1.34003e-06 [overlap_recompute_allgather_and_fa_grad]: 1.75001e-06 [overlap_recompute_comm]: 3.09999e-06 [overlap_grad_ring_attention]: 6.23e-06 [overlap_grad_flash_sp]: 3.405e-05 [begin_end_overlap_inline]: 5.89993e-07 [split_matmul_comm_elemetwise]: 2.36e-06 [split_layernorm_comm]: 2.42001e-06 [handle_group_info]: 1.09e-06 [symbol_engine_optimizer]: 0.00035695, [1] [Cycle 1]: 0.00035053, [6] [build]: 0.00019777 [elim_shapecalc]: 2.293e-05 [elim_not_effective]: 3.052e-05 [opt_reshape]: 1.558e-05 [fold_const_symbol]: 4.029e-05 [renormalize]: 5.60016e-07 [detach_backward]: 2.57001e-06 [pipeline_parallel_scheduler]: 1.89999e-06 [auto_monad_reorder]: 3.262e-05 [get_jit_bprop_graph]: 2.98e-06 [rewriter_after_jit_bprop_graph]: 6.02001e-06 [opt_after_jit_grad]: 0.00086551 [validate]: 9.439e-05 Sums bootstrap : 0.000946s : 0.05% type_inference : 1.637847s : 93.94% event_method : 0.000023s : 0.00% auto_monad : 0.000094s : 0.01% graph_reusing : 0.000007s : 0.00% inline : 0.000003s : 0.00% add_attr.add_attr_with_inline.tag_attr : 0.000024s : 0.00% add_attr.add_attr_with_inline.meta_addattr_fg_expand : 0.000006s : 0.00% parallel-infer-symbol : 0.000005s : 0.00% pre_auto_parallel : 0.000042s : 0.00% insert-virtual-dataset : 0.000003s : 0.00% parallel-infer-symbol-second : 0.000001s : 0.00% dataset_repeat_opt : 0.000002s : 0.00% pipeline_split : 0.000002s : 0.00% optimize.py_interpret_to_execute : 0.000029s : 0.00% optimize.rewriter_before_opt_a : 0.000091s : 0.01% optimize.opt_a.expand_dump_flag : 0.000008s : 0.00% optimize.opt_a.switch_simplify : 0.000064s : 0.00% optimize.opt_a.loop_unroll : 0.000044s : 0.00% optimize.opt_a.a_1 : 0.001567s : 0.09% optimize.opt_a.with_stream_mark : 0.000053s : 0.00% optimize.opt_a.recompute_prepare : 0.000031s : 0.00% optimize.opt_a.updatestate_depend_eliminate : 0.000017s : 0.00% optimize.opt_a.updatestate_assign_eliminate : 0.000013s : 0.00% optimize.opt_a.updatestate_loads_eliminate : 0.000011s : 0.00% optimize.opt_a.parameter_eliminate : 0.000005s : 0.00% optimize.opt_a.a_2 : 0.000390s : 0.02% optimize.opt_a.accelerated_algorithm : 0.000028s : 0.00% optimize.opt_a.shard : 0.000006s : 0.00% optimize.opt_a.meta_shard_fg_expand : 0.000008s : 0.00% optimize.opt_a.shard_inline : 0.000025s : 0.00% optimize.opt_a.merge_send_recv : 0.000028s : 0.00% optimize.opt_a.auto_parallel : 0.000024s : 0.00% optimize.opt_a.parallel : 0.000040s : 0.00% optimize.opt_a.flash_sp : 0.000015s : 0.00% optimize.opt_a.merge_comm : 0.000014s : 0.00% optimize.opt_a.allreduce_fusion : 0.000013s : 0.00% optimize.opt_a.matmul_add_comm_reduction : 0.000033s : 0.00% optimize.opt_a.allreduce_slice_to_reducescatter : 0.000002s : 0.00% optimize.opt_a.virtual_shard_identity : 0.000029s : 0.00% optimize.opt_a.virtual_dataset : 0.000026s : 0.00% optimize.opt_a.get_grad_eliminate_ : 0.000025s : 0.00% optimize.opt_a.virtual_output : 0.000025s : 0.00% optimize.opt_a.merge_forward : 0.000016s : 0.00% optimize.opt_a.cell_reuse_recompute_pass : 0.000005s : 0.00% optimize.opt_a.offload_activation : 0.000034s : 0.00% optimize.opt_a.cell_reuse_handle_not_recompute_node_pass : 0.000052s : 0.00% optimize.opt_a.merge_recompute_call_nodes : 0.000003s : 0.00% optimize.opt_a.before_grad : 0.000045s : 0.00% optimize.opt_a.set_forward_comm_id_for_comm_node_pass : 0.000016s : 0.00% optimize.opt_a.meta_fg_expand : 0.000012s : 0.00% optimize.opt_a.flash_sp_send_recv_attached : 0.000005s : 0.00% optimize.opt_a.receive_attached : 0.000005s : 0.00% optimize.opt_a.after_resolve : 0.000036s : 0.00% optimize.opt_a.a_after_grad : 0.000043s : 0.00% optimize.opt_a.renormalize : 0.095996s : 5.51% optimize.opt_a.add_forward_monad_depend : 0.000017s : 0.00% optimize.opt_a.auto_monad_grad : 0.000005s : 0.00% optimize.opt_a.auto_monad_eliminator : 0.000063s : 0.00% optimize.opt_a.cse : 0.000328s : 0.02% optimize.opt_a.a_3 : 0.000197s : 0.01% optimize.py_interpret_to_execute_after_opt_a : 0.000026s : 0.00% optimize.slice_cell_reuse_recomputed_activation : 0.000002s : 0.00% optimize.rewriter_after_opt_a : 0.000308s : 0.02% optimize.convert_after_rewriter : 0.000015s : 0.00% optimize.order_py_execute_after_rewriter : 0.000010s : 0.00% optimize.mutable_eliminate : 0.000856s : 0.05% optimize.opt_b.b_1 : 0.000333s : 0.02% optimize.opt_b.b_2 : 0.000015s : 0.00% optimize.opt_b.updatestate_depend_eliminate : 0.000016s : 0.00% optimize.opt_b.updatestate_assign_eliminate : 0.000006s : 0.00% optimize.opt_b.updatestate_loads_eliminate : 0.000006s : 0.00% optimize.opt_b.renormalize : 0.000001s : 0.00% optimize.opt_b.cse : 0.000070s : 0.00% optimize.optimize_parallel_all_gather_comm : 0.000031s : 0.00% optimize.overlap_param_gather : 0.000002s : 0.00% optimize.cconv : 0.000043s : 0.00% optimize.loop_unroll : 0.000647s : 0.04% optimize.opt_after_cconv.c_1 : 0.000066s : 0.00% optimize.opt_after_cconv.parameter_eliminate : 0.000006s : 0.00% optimize.opt_after_cconv.updatestate_depend_eliminate : 0.000012s : 0.00% optimize.opt_after_cconv.updatestate_assign_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.updatestate_loads_eliminate : 0.000005s : 0.00% optimize.opt_after_cconv.cse : 0.000051s : 0.00% optimize.opt_after_cconv.renormalize : 0.000001s : 0.00% optimize.remove_dup_value : 0.000133s : 0.01% optimize.tuple_transform.d_1 : 0.000639s : 0.04% optimize.tuple_transform.none_parameter_eliminate : 0.000003s : 0.00% optimize.tuple_transform.renormalize : 0.000000s : 0.00% optimize.tuple_transform.switch_simplify : 0.000018s : 0.00% optimize.partial_unused_args_eliminate : 0.000003s : 0.00% optimize.add_recomputation : 0.000121s : 0.01% optimize.cse_after_recomputation.cse : 0.000042s : 0.00% optimize.environ_conv : 0.000018s : 0.00% optimize.swap_dp_allreduce_reducescatter : 0.000010s : 0.00% optimize.bias_add_comm_swap : 0.000003s : 0.00% optimize.label_micro_interleaved_index : 0.000010s : 0.00% optimize.label_fine_grained_interleaved_index : 0.000003s : 0.00% optimize.merge_cast_opt : 0.000002s : 0.00% optimize.slice_recompute_activation : 0.000002s : 0.00% optimize.micro_interleaved_order_control : 0.000003s : 0.00% optimize.assign_add_opt : 0.000002s : 0.00% optimize.ForceFp32Comm : 0.000001s : 0.00% optimize.remove_cast_before_assign_add : 0.000001s : 0.00% optimize.full_micro_interleaved_order_control : 0.000002s : 0.00% optimize.reorder_send_recv_between_fp_bp : 0.000003s : 0.00% optimize.comm_op_add_attrs : 0.000001s : 0.00% optimize.add_comm_op_reuse_tag : 0.000001s : 0.00% optimize.interleave_split_concat_branches : 0.000002s : 0.00% optimize.interleave_parallel_branches : 0.000001s : 0.00% optimize.overlap_opt_shard_in_pipeline : 0.000001s : 0.00% optimize.overlap_opt_shard_grad_in_pipeline : 0.000002s : 0.00% optimize.control_data_broadcast_order : 0.000027s : 0.00% optimize.grouped_pairwise_exchange_alltoall : 0.000002s : 0.00% optimize.offloading_packed_experts : 0.000008s : 0.00% optimize.overlap_recompute_and_grad_model_parallel : 0.000008s : 0.00% optimize.overlap_grad_matmul_and_grad_allreduce : 0.000001s : 0.00% optimize.overlap_recompute_allgather_and_fa_grad : 0.000002s : 0.00% optimize.overlap_recompute_comm : 0.000003s : 0.00% optimize.overlap_grad_ring_attention : 0.000006s : 0.00% optimize.overlap_grad_flash_sp : 0.000034s : 0.00% optimize.begin_end_overlap_inline : 0.000001s : 0.00% optimize.split_matmul_comm_elemetwise : 0.000002s : 0.00% optimize.split_layernorm_comm : 0.000002s : 0.00% optimize.handle_group_info : 0.000001s : 0.00% optimize.symbol_engine_optimizer.build : 0.000198s : 0.01% optimize.symbol_engine_optimizer.elim_shapecalc : 0.000023s : 0.00% optimize.symbol_engine_optimizer.elim_not_effective : 0.000031s : 0.00% optimize.symbol_engine_optimizer.opt_reshape : 0.000016s : 0.00% optimize.symbol_engine_optimizer.fold_const_symbol : 0.000040s : 0.00% optimize.symbol_engine_optimizer.renormalize : 0.000001s : 0.00% detach_backward : 0.000003s : 0.00% pipeline_parallel_scheduler : 0.000002s : 0.00% auto_monad_reorder : 0.000033s : 0.00% get_jit_bprop_graph : 0.000003s : 0.00% rewriter_after_jit_bprop_graph : 0.000006s : 0.00% opt_after_jit_grad : 0.000866s : 0.05% validate : 0.000094s : 0.01% Time group info: ------[substitution.] 0.000650 111 7.56% : 0.000049s : 6: substitution.arithmetic_simplify 1.05% : 0.000007s : 6: substitution.elim_not_effective 2.13% : 0.000014s : 6: substitution.float_tuple_getitem_switch 3.22% : 0.000021s : 6: substitution.fold_const_symbol 2.29% : 0.000015s : 8: substitution.graph_param_transform 28.72% : 0.000187s : 5: substitution.inline 1.22% : 0.000008s : 12: substitution.j_node_and_user_rematch 1.76% : 0.000011s : 4: substitution.minmaximum_grad 1.77% : 0.000011s : 12: substitution.remove_not_recompute_node 0.82% : 0.000005s : 2: substitution.replace_old_param 6.36% : 0.000041s : 8: substitution.tuple_list_convert_item_index_to_positive 2.58% : 0.000017s : 8: substitution.tuple_list_get_item_const_eliminator 26.94% : 0.000175s : 8: substitution.tuple_list_get_item_depend_reorder 9.62% : 0.000063s : 12: substitution.tuple_list_get_item_eliminator 3.97% : 0.000026s : 8: substitution.tuple_list_get_set_item_eliminator ------[type_inference.] 1.637754 2 99.77% : 1.633968s : 1: type_inference.infer 0.23% : 0.003786s : 1: type_inference.specialize ------[replace.] 0.000047 5 100.00% : 0.000047s : 5: replace.inline ------[match.] 0.000183 5 100.00% : 0.000183s : 5: match.inline ------[predicate.] 0.000415 2113 0.98% : 0.000004s : 21: predicate.accumulaten_eliminater 1.03% : 0.000004s : 8: predicate.ad_related_special_op_eliminate 0.63% : 0.000003s : 16: predicate.addn_check_dump 0.84% : 0.000003s : 21: predicate.addn_zero_filter 0.84% : 0.000003s : 21: predicate.adjust_all_reduce_mul_add 2.29% : 0.000009s : 37: predicate.arithmetic_simplify 0.92% : 0.000004s : 21: predicate.cast_eliminate 0.68% : 0.000003s : 16: predicate.check_bprop_eliminate 0.66% : 0.000003s : 16: predicate.compare_switch_simplify 0.20% : 0.000001s : 8: predicate.const_output_eliminate 0.64% : 0.000003s : 16: predicate.depend_value_elim 0.86% : 0.000004s : 21: predicate.dict_get_item_const_eliminator 1.08% : 0.000004s : 21: predicate.dict_get_item_eliminator 0.86% : 0.000004s : 21: predicate.dict_set_item_eliminator 1.32% : 0.000005s : 16: predicate.dumpgradient_eliminate 0.24% : 0.000001s : 8: predicate.elim_not_effective 0.50% : 0.000002s : 8: predicate.elim_shapecalc_of_broadcastargs 1.26% : 0.000005s : 29: predicate.environ_add_const_eliminate 1.11% : 0.000005s : 29: predicate.environ_get_add_eliminate 1.14% : 0.000005s : 29: predicate.environ_get_depend_swap 1.86% : 0.000008s : 45: predicate.environ_get_eliminate 1.17% : 0.000005s : 29: predicate.environ_get_set_eliminate 1.01% : 0.000004s : 26: predicate.exchange_switch_depend_value 1.74% : 0.000007s : 26: predicate.float_depend_g_call 0.67% : 0.000003s : 16: predicate.float_environ_get_switch 1.05% : 0.000004s : 24: predicate.float_tuple_getitem_switch 0.20% : 0.000001s : 8: predicate.fold_const_symbol 0.71% : 0.000003s : 16: predicate.get_grad_eliminate 0.29% : 0.000001s : 8: predicate.graph_param_transform 0.67% : 0.000003s : 16: predicate.incorporate_call 0.62% : 0.000003s : 16: predicate.incorporate_call_switch 5.65% : 0.000023s : 95: predicate.inline 0.95% : 0.000004s : 16: predicate.inline_without_move 0.34% : 0.000001s : 16: predicate.j_node_and_user_rematch 0.93% : 0.000004s : 16: predicate.less_batch_normalization 1.68% : 0.000007s : 37: predicate.list_to_tuple_eliminator_ 2.33% : 0.000010s : 58: predicate.load_eliminater 1.21% : 0.000005s : 8: predicate.loop_unroll_after_grad 1.58% : 0.000007s : 36: predicate.loop_unroll_before_grad 1.71% : 0.000007s : 37: predicate.make_slice_get_slice_eliminator 0.78% : 0.000003s : 16: predicate.merge_addn 0.68% : 0.000003s : 16: predicate.micro_step_allgather_replace 0.67% : 0.000003s : 16: predicate.mini_step_allgather_replace 0.84% : 0.000003s : 21: predicate.minmaximum_grad 1.18% : 0.000005s : 8: predicate.mutable_eliminate 0.42% : 0.000002s : 8: predicate.opt_reshape 0.48% : 0.000002s : 8: predicate.parallel_virtual_node 1.38% : 0.000006s : 26: predicate.partial_defer_inline 1.25% : 0.000005s : 29: predicate.partial_eliminate 0.84% : 0.000003s : 21: predicate.print_const_string_wrapper 0.70% : 0.000003s : 16: predicate.reduce_all_const_elim 1.29% : 0.000005s : 21: predicate.reduce_eliminate 2.34% : 0.000010s : 58: predicate.redundant_stop_gradient_eliminater 0.38% : 0.000002s : 16: predicate.remove_not_recompute_node 1.24% : 0.000005s : 37: predicate.replace_applicator 0.46% : 0.000002s : 16: predicate.replace_old_param 0.28% : 0.000001s : 8: predicate.reset_defer_inline 0.91% : 0.000004s : 21: predicate.reshape_eliminate 0.76% : 0.000003s : 16: predicate.row_tensor_add_zeros_like 0.49% : 0.000002s : 8: predicate.row_tensor_eliminate 1.04% : 0.000004s : 16: predicate.same_eliminate 0.44% : 0.000002s : 16: predicate.set_cell_output_no_recompute 0.87% : 0.000004s : 16: predicate.shard_identity_eliminate 0.92% : 0.000004s : 16: predicate.special_op_eliminate 0.80% : 0.000003s : 16: predicate.specialize_transform 0.91% : 0.000004s : 16: predicate.split_environ_get_set_with_tuple_value 0.96% : 0.000004s : 16: predicate.stack_unstack_eliminate 0.41% : 0.000002s : 8: predicate.switch_call_monad_eliminater 1.12% : 0.000005s : 26: predicate.switch_defer_inline 1.73% : 0.000007s : 42: predicate.switch_layer_defer_inline 3.99% : 0.000017s : 86: predicate.switch_simplify 0.87% : 0.000004s : 21: predicate.tile_eliminate 0.86% : 0.000004s : 21: predicate.transpose_eliminate 1.86% : 0.000008s : 37: predicate.tuple_list_convert_item_index_to_positive 2.03% : 0.000008s : 37: predicate.tuple_list_get_item_const_eliminator 2.12% : 0.000009s : 37: predicate.tuple_list_get_item_depend_reorder 3.33% : 0.000014s : 53: predicate.tuple_list_get_item_eliminator 1.79% : 0.000007s : 37: predicate.tuple_list_get_set_item_eliminator 2.57% : 0.000011s : 53: predicate.tuple_list_set_item_eliminator 1.61% : 0.000007s : 37: predicate.tuple_to_list_eliminator_ 2.29% : 0.000010s : 58: predicate.updatestate_pure_node_eliminater 2.96% : 0.000012s : 74: predicate.updatestate_useless_node_eliminater 0.41% : 0.000002s : 8: predicate.value_based_eliminate 0.77% : 0.000003s : 16: predicate.virtual_dataset_eliminate 0.74% : 0.000003s : 16: predicate.virtual_output_eliminate 0.31% : 0.000001s : 8: predicate.virtual_view_grad_eliminate 0.47% : 0.000002s : 8: predicate.zero_like_fill_zero ------[func_graph_cloner_run.] 0.007154 32 63.09% : 0.004513s : 25: func_graph_cloner_run.FuncGraphClonerGraph 36.91% : 0.002641s : 7: func_graph_cloner_run.FuncGraphSpecializer ------[meta_graph.] 0.000000 0 ------[manager.] 0.000000 0 ------[pynative] 0.000000 0 ------[others.] 1.961563 192 0.00% : 0.000004s : 1: ForceFp32Comm 0.31% : 0.006088s : 1: add_attr 0.31% : 0.006071s : 1: add_attr_with_inline 0.00% : 0.000004s : 1: add_comm_op_reuse_tag 0.01% : 0.000127s : 1: add_recomputation 0.00% : 0.000006s : 1: assign_add_opt 0.01% : 0.000099s : 1: auto_monad 0.00% : 0.000038s : 1: auto_monad_reorder 0.00% : 0.000005s : 1: begin_end_overlap_inline 0.00% : 0.000007s : 1: bias_add_comm_swap 0.07% : 0.001400s : 1: bootstrap 0.00% : 0.000048s : 1: cconv 0.00% : 0.000004s : 1: comm_op_add_attrs 0.00% : 0.000031s : 1: control_data_broadcast_order 0.00% : 0.000021s : 1: convert_after_rewriter 0.00% : 0.000061s : 1: cse_after_recomputation 0.00% : 0.000006s : 1: dataset_repeat_opt 0.00% : 0.000008s : 1: detach_backward 0.00% : 0.000022s : 1: environ_conv 0.00% : 0.000030s : 1: event_method 0.00% : 0.000007s : 1: full_micro_interleaved_order_control 0.00% : 0.000006s : 1: get_jit_bprop_graph 0.00% : 0.000011s : 1: graph_reusing 0.00% : 0.000005s : 1: grouped_pairwise_exchange_alltoall 0.00% : 0.000006s : 1: handle_group_info 0.00% : 0.000006s : 1: inline 0.00% : 0.000006s : 1: insert-virtual-dataset 0.00% : 0.000004s : 1: interleave_parallel_branches 0.00% : 0.000005s : 1: interleave_split_concat_branches 0.00% : 0.000006s : 1: label_fine_grained_interleaved_index 0.00% : 0.000013s : 1: label_micro_interleaved_index 0.03% : 0.000662s : 1: loop_unroll 0.00% : 0.000006s : 1: merge_cast_opt 0.00% : 0.000005s : 1: micro_interleaved_order_control 0.04% : 0.000872s : 1: mutable_eliminate 0.00% : 0.000012s : 1: offloading_packed_experts 0.00% : 0.000028s : 1: opt.transform.loop_unroll_optimizer 0.00% : 0.000035s : 1: opt.transform.mutable_eliminate 0.13% : 0.002506s : 78: opt.transform.opt_a 0.00% : 0.000064s : 1: opt.transform.opt_after_cconv 0.00% : 0.000056s : 1: opt.transform.opt_after_jit_grad 0.02% : 0.000310s : 28: opt.transform.opt_b 0.03% : 0.000651s : 2: opt.transform.opt_trans_graph 0.01% : 0.000104s : 4: opt.transform.symbol_engine_opt 5.10% : 0.100037s : 1: opt_a 0.01% : 0.000197s : 1: opt_after_cconv 0.05% : 0.000884s : 1: opt_after_jit_grad 0.03% : 0.000510s : 1: opt_b 5.34% : 0.104658s : 1: optimize 0.00% : 0.000036s : 1: optimize_parallel_all_gather_comm 0.00% : 0.000014s : 1: order_py_execute_after_rewriter 0.00% : 0.000038s : 1: overlap_grad_flash_sp 0.00% : 0.000004s : 1: overlap_grad_matmul_and_grad_allreduce 0.00% : 0.000011s : 1: overlap_grad_ring_attention 0.00% : 0.000006s : 1: overlap_opt_shard_grad_in_pipeline 0.00% : 0.000004s : 1: overlap_opt_shard_in_pipeline 0.00% : 0.000007s : 1: overlap_param_gather 0.00% : 0.000004s : 1: overlap_recompute_allgather_and_fa_grad 0.00% : 0.000011s : 1: overlap_recompute_and_grad_model_parallel 0.00% : 0.000006s : 1: overlap_recompute_comm 0.00% : 0.000008s : 1: parallel-infer-symbol 0.00% : 0.000004s : 1: parallel-infer-symbol-second 0.00% : 0.000007s : 1: partial_unused_args_eliminate 0.00% : 0.000005s : 1: pipeline_parallel_scheduler 0.00% : 0.000005s : 1: pipeline_split 0.00% : 0.000046s : 1: pre_auto_parallel 0.00% : 0.000033s : 1: py_interpret_to_execute 0.00% : 0.000031s : 1: py_interpret_to_execute_after_opt_a 0.00% : 0.000004s : 1: remove_cast_before_assign_add 0.01% : 0.000139s : 1: remove_dup_value 4.74% : 0.092984s : 1: renormalize.infer 0.15% : 0.002988s : 1: renormalize.specialize 0.00% : 0.000006s : 1: reorder_send_recv_between_fp_bp 0.00% : 0.000010s : 1: rewriter_after_jit_bprop_graph 0.02% : 0.000322s : 1: rewriter_after_opt_a 0.00% : 0.000096s : 1: rewriter_before_opt_a 0.00% : 0.000005s : 1: slice_cell_reuse_recomputed_activation 0.00% : 0.000005s : 1: slice_recompute_activation 0.00% : 0.000005s : 1: split_layernorm_comm 0.00% : 0.000005s : 1: split_matmul_comm_elemetwise 0.00% : 0.000014s : 1: swap_dp_allreduce_reducescatter 0.02% : 0.000360s : 1: symbol_engine_optimizer 0.04% : 0.000716s : 1: tuple_transform 83.50% : 1.637876s : 1: type_inference . [hook] pytest_runtest_teardown:test_swiglu[True-float16--1-shape1] tests/st/infer/ops/test_internal_ops/test_swiglu_v2.py::test_swiglu[True-float16--1-shape1],max_mem:100.0M =============================== warnings summary =============================== ../../../../../../../../../../usr/local/Ascend/cann-8.5.0/python/site-packages/tbe/dsl/classifier/transdata/transdata_classifier.py:222 /usr/local/Ascend/cann-8.5.0/python/site-packages/tbe/dsl/classifier/transdata/transdata_classifier.py:222: DeprecationWarning: invalid escape sequence \B """ ../../../../../../../../../../usr/local/Ascend/cann-8.5.0/python/site-packages/tbe/dsl/unify_schedule/vector/transdata/common/graph/transdata_graph_info.py:143 /usr/local/Ascend/cann-8.5.0/python/site-packages/tbe/dsl/unify_schedule/vector/transdata/common/graph/transdata_graph_info.py:143: DeprecationWarning: invalid escape sequence \c """ ../../../../../../../../../../usr/local/Ascend/cann-8.5.0/python/site-packages/tbe/dsl/unify_schedule/vector/transdata/common/graph/transdata_graph_info.py:170 /usr/local/Ascend/cann-8.5.0/python/site-packages/tbe/dsl/unify_schedule/vector/transdata/common/graph/transdata_graph_info.py:170: DeprecationWarning: invalid escape sequence \c """ ../../../../../../../../anaconda3/envs/ci39/lib/python3.9/site-packages/numpy/core/getlimits.py:549 /home/jenkins/anaconda3/envs/ci39/lib/python3.9/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for type is zero. setattr(self, word, getattr(machar, word).flat[0]) ../../../../../../../../anaconda3/envs/ci39/lib/python3.9/site-packages/numpy/core/getlimits.py:89 /home/jenkins/anaconda3/envs/ci39/lib/python3.9/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for type is zero. return self._float_to_str(self.smallest_subnormal) ../../../../../../../../anaconda3/envs/ci39/lib/python3.9/site-packages/numpy/core/getlimits.py:549 /home/jenkins/anaconda3/envs/ci39/lib/python3.9/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for type is zero. setattr(self, word, getattr(machar, word).flat[0]) ../../../../../../../../anaconda3/envs/ci39/lib/python3.9/site-packages/numpy/core/getlimits.py:89 /home/jenkins/anaconda3/envs/ci39/lib/python3.9/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for type is zero. return self._float_to_str(self.smallest_subnormal) ../../../../../../../../anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57 /home/jenkins/anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py:57: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2") ../../../../../../../../anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56 /home/jenkins/anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py:56: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad") ../../../../../../../../anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48 /home/jenkins/anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("batchnorm_fold2_grad_reduce") ../../../../../../../../anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51 /home/jenkins/anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul") ../../../../../../../../anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51 /home/jenkins/anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:51: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad") ../../../../../../../../anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143 /home/jenkins/anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py:143: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("correction_mul_grad_reduce") ../../../../../../../../anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50 /home/jenkins/anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer") ../../../../../../../../anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92 /home/jenkins/anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py:92: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d") ../../../../../../../../anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49 /home/jenkins/anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py:49: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d_reduce") ../../../../../../../../anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50 /home/jenkins/anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel") ../../../../../../../../anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91 /home/jenkins/anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py:91: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d") ../../../../../../../../anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48 /home/jenkins/anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py:48: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d_reduce") ../../../../../../../../anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52 /home/jenkins/anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py:52: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel") ../../../../../../../../anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81 /home/jenkins/anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_perchannel_grad") ../../../../../../../../anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54 /home/jenkins/anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py:54: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer") ../../../../../../../../anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81 /home/jenkins/anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py:81: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("fake_quant_per_layer_grad") ../../../../../../../../anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50 /home/jenkins/anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perchannel") ../../../../../../../../anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50 /home/jenkins/anaconda3/envs/ci39/lib/python3.9/site-packages/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py:50: DeprecationWarning: te_fusion.fusion_manager.fusion_manager.register is deprecated,please replace it with tbe.common.register.register_op_compute @fusion_manager.register("minmax_update_perlayer") -- Docs: https://docs.pytest.org/en/stable/warnings.html ================== 8 passed, 25 warnings in 93.42s (0:01:33) ===================