Coverage for hyper_parallel / platform / mindspore / hsdp / grad_hook.py: 74%
31 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-03-01 07:33 +0800
« prev ^ index » next coverage.py v7.13.1, created at 2026-03-01 07:33 +0800
1# Copyright 2025 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ============================================================================
15"""HSDP gradient hook"""
16from mindspore import ops, jit_class
17from mindspore.common.parameter import Parameter
18from mindspore.common.tensor import Tensor
19from hyper_parallel.core.hsdp.hsdp_grad_hook import HSDPGradHook
22@jit_class
23class MindSporeHSDPGradHook(HSDPGradHook):
24 """MindSpore HSDP gradient hook"""
26 def __init__(self, config, platform):
27 """init"""
28 super().__init__(config, platform)
29 if not self.use_eager_hook:
30 self.requires_grad_sync = Parameter(Tensor(False), name="hsdp_requires_grad_sync", requires_grad=False)
32 def _cast_hook(self, hook, grad):
33 """add cast before and after reduce hook"""
34 if self.reduce_dtype is None:
35 return hook(grad)
36 origin_dtype = ops.dtype(grad)
37 grad_cast = ops.cast(grad, self.reduce_dtype)
38 output = hook(grad_cast)
39 output = ops.cast(output, origin_dtype)
40 return output
42 def _get_final_grad_hook(self, param, grad_hook, no_cast=False):
43 """add cast and scale grad"""
44 final_hook = super()._get_final_grad_hook(param, grad_hook, no_cast)
45 def set_grad_hook(grad):
46 grad = final_hook(grad)
47 param.grad = grad
48 return grad
50 if self.use_eager_hook:
51 return set_grad_hook
52 return final_hook
54 def set_requires_grad_sync(self, requires_grad_sync):
55 """set requires grad sync flag to control gradient sync."""
56 if self.use_eager_hook:
57 self.requires_grad_sync = requires_grad_sync
58 else:
59 ops.assign(self.requires_grad_sync, Tensor(requires_grad_sync))