Coverage for hyper_parallel / platform / mindspore / hsdp / grad_hook.py: 74%

31 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-03-01 07:33 +0800

1# Copyright 2025 Huawei Technologies Co., Ltd 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================ 

15"""HSDP gradient hook""" 

16from mindspore import ops, jit_class 

17from mindspore.common.parameter import Parameter 

18from mindspore.common.tensor import Tensor 

19from hyper_parallel.core.hsdp.hsdp_grad_hook import HSDPGradHook 

20 

21 

22@jit_class 

23class MindSporeHSDPGradHook(HSDPGradHook): 

24 """MindSpore HSDP gradient hook""" 

25 

26 def __init__(self, config, platform): 

27 """init""" 

28 super().__init__(config, platform) 

29 if not self.use_eager_hook: 

30 self.requires_grad_sync = Parameter(Tensor(False), name="hsdp_requires_grad_sync", requires_grad=False) 

31 

32 def _cast_hook(self, hook, grad): 

33 """add cast before and after reduce hook""" 

34 if self.reduce_dtype is None: 

35 return hook(grad) 

36 origin_dtype = ops.dtype(grad) 

37 grad_cast = ops.cast(grad, self.reduce_dtype) 

38 output = hook(grad_cast) 

39 output = ops.cast(output, origin_dtype) 

40 return output 

41 

42 def _get_final_grad_hook(self, param, grad_hook, no_cast=False): 

43 """add cast and scale grad""" 

44 final_hook = super()._get_final_grad_hook(param, grad_hook, no_cast) 

45 def set_grad_hook(grad): 

46 grad = final_hook(grad) 

47 param.grad = grad 

48 return grad 

49 

50 if self.use_eager_hook: 

51 return set_grad_hook 

52 return final_hook 

53 

54 def set_requires_grad_sync(self, requires_grad_sync): 

55 """set requires grad sync flag to control gradient sync.""" 

56 if self.use_eager_hook: 

57 self.requires_grad_sync = requires_grad_sync 

58 else: 

59 ops.assign(self.requires_grad_sync, Tensor(requires_grad_sync)) 

60