Coverage for hyper_parallel/core/shard/ops/parallel_activation_with

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ============================================================================

15"""

16Activation with axis distributed operator implementation.

17"""

19from .parallel_ops import DistributedOp

21class ActivationWithAxisDistributedOp(DistributedOp):

22 """

23 Distributed implementation for activation-with-axis operators (e.g., softmax).

25 Inherits from DistributedOp and provides activation-with-axis specific implementations.

26 """

28 def infer_layout(self, layouts, extra_args):

29 """

30 Infer output layouts for activation-with-axis operations.

32 For activation-with-axis operations, all inputs should have the same layout,

33 and the output will have the same layout.

35 Args:

36 primitive: Primitive instance

37 layouts: Layouts of input tensors

39 Returns:

40 tuple: Layout for output tensor.

42 Raises:

43 ValueError: If input layouts are not compatible or have partial status.

44 """

45 if not layouts:

46 return None

48 # Check partial inputs

49 if not self._allow_partial_inputs:

50 self._check_partial_inputs(layouts)

52 self.check_layout(layouts, extra_args)

53 # Verify all layouts are the same

54 first_layout = None

55 for layout in layouts:

56 if first_layout is None and layout is not None:

57 first_layout = layout

58 if layout is not None and first_layout is not None and layout != first_layout:

59 raise ValueError(

60 f"Operation {self.op_name} requires all tensor inputs to have the same layout. "

61 f"Input a: {first_layout}, Input b: {layout}")

62 return first_layout

64 def check_layout(self, layouts, extra_args):

65 """

66 check_layout

67 """

68 min_slice_num = 1

69 x_dict = layouts[0].to_dict()

70 x_dev = x_dict["tensor_map"]

71 extra_args = extra_args[0]

72 if not isinstance(extra_args, (int, tuple)):

73 raise ValueError(

74 f"Operation {self.op_name}: The extra args should be int or tuple, but got ({type(extra_args)})")

75 extra_args = (extra_args,) if isinstance(extra_args, int) else extra_args

76 for axis_index in extra_args:

77 tensor_map = x_dev[axis_index]

78 if tensor_map == -1:

79 continue

80 axis_strategy = x_dict["mesh_shape"][len(x_dict["mesh_shape"]) - tensor_map - 1]

81 if axis_strategy != min_slice_num:

82 raise ValueError(

83 f"Operation {self.op_name}: The axis dimension (in dim {axis_index}) is sharded "

84 f"(strategy is {axis_strategy}). This operation requires the reduction axis to be un-sharded.")

Coverage for hyper_parallel / core / shard / ops / parallel_activation_with_axis.py: 90%

30 statements