Coverage for hyper_parallel/platform/mindspore/hsdp/grad

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ============================================================================

15"""HSDP gradient hook"""

16from mindspore import ops, jit_class

17from mindspore.common.parameter import Parameter

18from mindspore.common.tensor import Tensor

19from hyper_parallel.core.hsdp.hsdp_grad_hook import HSDPGradHook

22@jit_class

23class MindSporeHSDPGradHook(HSDPGradHook):

24 """MindSpore HSDP gradient hook"""

26 def __init__(self, config, platform):

27 """init"""

28 super().__init__(config, platform)

29 if not self.use_eager_hook:

30 self.requires_grad_sync = Parameter(Tensor(False), name="hsdp_requires_grad_sync", requires_grad=False)

32 def _cast_hook(self, hook, grad):

33 """add cast before and after reduce hook"""

34 if self.reduce_dtype is None:

35 return hook(grad)

36 origin_dtype = ops.dtype(grad)

37 grad_cast = ops.cast(grad, self.reduce_dtype)

38 output = hook(grad_cast)

39 output = ops.cast(output, origin_dtype)

40 return output

42 def _get_final_grad_hook(self, param, grad_hook, no_cast=False):

43 """add cast and scale grad"""

44 final_hook = super()._get_final_grad_hook(param, grad_hook, no_cast)

45 def set_grad_hook(grad):

46 grad = final_hook(grad)

47 param.grad = grad

48 return grad

50 if self.use_eager_hook:

51 return set_grad_hook

52 return final_hook

54 def set_requires_grad_sync(self, requires_grad_sync):

55 """set requires grad sync flag to control gradient sync."""

56 if self.use_eager_hook:

57 self.requires_grad_sync = requires_grad_sync

58 else:

59 ops.assign(self.requires_grad_sync, Tensor(requires_grad_sync))

Coverage for hyper_parallel / platform / mindspore / hsdp / grad_hook.py: 74%

31 statements