Diff Coverage

Diff: origin/master...HEAD, staged and unstaged changes

Source File Diff Coverage (%) Missing Lines
hyper_parallel/auto_parallel/sapp_nd/memory_estimation/_backbone.py 100%  
hyper_parallel/auto_parallel/sapp_nd/memory_estimation/_context.py 100%  
hyper_parallel/auto_parallel/sapp_nd/memory_estimation/_hook_manager.py 100%  
hyper_parallel/auto_parallel/sapp_nd/memory_estimation/evaluators/body.py 96.2% 127
hyper_parallel/auto_parallel/sapp_nd/memory_estimation/evaluators/comm.py 100%  
hyper_parallel/auto_parallel/sapp_nd/memory_estimation/evaluators/layer_block.py 100%  
hyper_parallel/auto_parallel/sapp_nd/nd/common/_cost_model_variables.py 100%  
hyper_parallel/auto_parallel/sapp_nd/nd/common/cost_model_preprocess.py 100%  
hyper_parallel/auto_parallel/sapp_nd/nd/common/framework_parsers/_cost_model_parser.py 100%  
hyper_parallel/auto_parallel/sapp_nd/nd/common/framework_parsers/cost_model_parser_mindformers.py 80.0% 274
hyper_parallel/auto_parallel/sapp_nd/memory_estimation/evaluators/body.py
123
124
125
126
127
128
129
130
131
    def fullrec_layer_activ_gradclip(
        ccfg: CostModelConfig, ctx: Context
    ) -> float:
        """special case with gradient clipping"""
        non_exp_p, routed_p, shared_p = ctx.eval.num_p(ccfg, ctx)
        grad_clip_mem = (
            non_exp_p
            + routed_p / ccfg.ep * ccfg.bytes_os / ccfg.shard_p_os_exp
            + shared_p * ccfg.bytes_os / ccfg.shard_p_os_exp_partial
hyper_parallel/auto_parallel/sapp_nd/nd/common/framework_parsers/cost_model_parser_mindformers.py
270
271
272
273
274
275
276
277
278
            self.ccfg.os_max_shard = self.ccfg.op_weight_shard
        elif self.ccfg.has_op:
            self.ccfg.os_max_shard = self.ccfg.d * self.ccfg.t
        else:
            self.ccfg.os_max_shard = 1
        self.config_optimizer_shard(self.ccfg)

        # Other factors
        self.config_shard_emb()