Coverage for hyper_parallel/platform/torch/fully_shard/async_grad

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ============================================================================

15"""Torch HSDP async gradient hook"""

16from hyper_parallel.core.fully_shard.hsdp_async_grad_hook import HSDPAsyncGradHook

19class TorchHSDPAsyncGradHook(HSDPAsyncGradHook):

20 """

21 Torch HSDP gradient hook for handling gradient operations in hybrid sharded data parallel training.

23 This class extends the base HSDPAsyncGradHook to provide PyTorch-specific async gradient hook functionality,

24 including gradient scaling and parameter gradient management.

25 """

27 def _get_final_async_grad_hook(self, param, async_hook, post_hook=None):

28 """

29 Create a final async gradient hook with post hook.

31 Args:

32 param: The parameter tensor to apply the gradient hook to

33 async_hook: The async gradient hook function

34 post_hook: post-processing hook, defaults to None

36 Returns:

37 function: A async gradient hook function that processes gradients with scaling,

38 and parameter gradient management

39 """

40 final_hook = super()._get_final_async_grad_hook(param, async_hook, post_hook)

41 def set_grad_hook(grad):

42 final_grad = final_hook(grad)

43 grad.data = final_grad

44 param.grad = None

45 return grad

47 return set_grad_hook

Coverage for hyper_parallel / platform / torch / fully_shard / async_grad_hook.py: 0%