Coverage for hyper_parallel / platform / torch / fully_shard / grad_hook.py: 0%

10 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-03-01 07:33 +0800

1# Copyright 2025 Huawei Technologies Co., Ltd 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================ 

15"""Torch HSDP gradient hook""" 

16from hyper_parallel.core.fully_shard.hsdp_grad_hook import HSDPGradHook 

17 

18 

19class TorchHSDPGradHook(HSDPGradHook): 

20 """ 

21 Torch HSDP gradient hook for handling gradient operations in hybrid sharded data parallel training. 

22  

23 This class extends the base HSDPGradHook to provide PyTorch-specific gradient hook functionality, 

24 including gradient casting, scaling and parameter gradient management. 

25 """ 

26 

27 def _get_final_grad_hook(self, param, grad_hook, no_cast=False): 

28 """ 

29 Create a final gradient hook that adds casting and scaling operations. 

30  

31 Args: 

32 param: The parameter tensor to apply the gradient hook to 

33 grad_hook: The base gradient hook function 

34 no_cast (bool): Whether to skip gradient casting operations, defaults to False 

35  

36 Returns: 

37 function: A gradient hook function that processes gradients with casting, scaling, 

38 and parameter gradient management 

39 """ 

40 final_hook = super()._get_final_grad_hook(param, grad_hook, no_cast) 

41 def set_grad_hook(grad): 

42 final_grad = final_hook(grad) 

43 grad.data = final_grad 

44 param.grad = None 

45 return grad 

46 

47 return set_grad_hook