Coverage for /home/jenkins/.local/lib/python3.10/site-packages/hyper_parallel/platform/mindspore/pipeline_parallel/

1# http://www.apache.org/licenses/LICENSE-2.0

3# Unless required by applicable law or agreed to in writing, software

4# distributed under the License is distributed on an "AS IS" BASIS,

5# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

6# See the License for the specific language governing permissions and

7# limitations under the License.

8# ============================================================================

9"""pipeline parallel utils"""

10import io

11import pickle

13from mindspore import nn, Tensor, mint, ops

14from mindspore.common import dtype as mstype

15from mindspore.communication import GlobalComm

16from mindspore.mint.distributed.distributed import _object_to_tensor, send, recv

18import hyper_parallel

19from hyper_parallel.core.shard.custom_shard import custom_shard

22class _MicroBatch(nn.Cell):

23 """

24 Split inputs into micro_batch in pipeline parallel.

26 Args:

27 micro_batch_num (int): The number of micro-batch.

28 args_batch_dim (list, optional): Specify the batch dim of the args.

29 Default ``None``.

30 kwargs_batch_dim(dict, optional): Specify the batch dim of the kwargs.

31 Default ``None``.

32 Inputs:

33 - **args** (list) - Input args.

34 - **kwargs** (dict) - Input kwargs.

36 Outputs:

37 - **args_after_split** (list) - Input args after split into micro_batches.

38 - **kwargs_after_split** (list) - Input kwargs after split into micro_batches.

39 """

41 def __init__(self, micro_batch_num, args_batch_dim=None, kwargs_batch_dim=None):

42 super().__init__()

43 self.micro_batch_num = micro_batch_num

44 self.args_batch_dim = args_batch_dim

45 self.kwargs_batch_dim = kwargs_batch_dim

47 def construct(self, args, kwargs):

48 """Construct of _MicroBatch"""

49 args_after_split = []

50 kwargs_after_split = []

51 for micro_idx in range(self.micro_batch_num):

52 micro_args = []

53 micro_kwargs = {}

54 for arg_idx, cur_arg in enumerate(args):

55 cur_arg_batch_dim = 0

56 if self.args_batch_dim and self.args_batch_dim[arg_idx] is not None:

57 cur_arg_batch_dim = self.args_batch_dim[arg_idx].batch_dim

58 micro_arg = self.split_inputs_with_custom_shard(cur_arg, cur_arg_batch_dim, micro_idx)

59 micro_args.append(micro_arg)

60 args_after_split.append(micro_args)

62 for key, cur_kwarg in kwargs.items():

63 cur_kwarg_batch_dim = 0

64 if self.kwargs_batch_dim is not None:

65 cur_kwarg_batch_dim = self.kwargs_batch_dim[key].batch_dim

66 micro_kwarg = self.split_inputs_with_custom_shard(cur_kwarg, cur_kwarg_batch_dim, micro_idx)

67 micro_kwargs[key] = micro_kwarg

68 kwargs_after_split.append(micro_kwargs)

69 return args_after_split, kwargs_after_split

71 def split_inputs_with_custom_shard(self, input_tensor, cur_arg_batch_dim, micro_idx):

72 if not isinstance(input_tensor, hyper_parallel.DTensor):

73 raise TypeError(f"Input type {type(input_tensor)} is not DTensor.")

74 input_layout = input_tensor.layout

75 func_wrap = custom_shard(self.split_inputs,

76 device_mesh=input_layout.mesh,

77 out_placements=(input_layout.placements,),

78 in_placements=(input_layout.placements, None, None)

79 )

80 return func_wrap(input_tensor, cur_arg_batch_dim, micro_idx)

82 def split_inputs(self, input_tensor, cur_arg_batch_dim, micro_idx):

83 """

84 Split the input along the specified batch_dim and micro_idx

85 """

86 if cur_arg_batch_dim == -1:

87 return input_tensor

88 batch_dim_shape = input_tensor.shape[cur_arg_batch_dim]

89 micro_batch_begin = (batch_dim_shape // self.micro_batch_num) * micro_idx

90 micro_batch_end = (batch_dim_shape // self.micro_batch_num) * (micro_idx + 1)

91 strided_slice_begin = [0] * input_tensor.ndim

92 strided_slice_strides = [1] * input_tensor.ndim

93 strided_slice_end = list(input_tensor.shape)

94 strided_slice_begin[cur_arg_batch_dim] = micro_batch_begin

95 strided_slice_end[cur_arg_batch_dim] = micro_batch_end

96 micro_input = ops.strided_slice(input_tensor, strided_slice_begin, strided_slice_end, strided_slice_strides)

97 return micro_input

100def send_object_list(obj, dst=0, group=None):

101 """

102 Send the input Python object to dst rank.

103

104 Args:

105 obj (Any): The input tensor to be send.

106 dst (int, optional): Specifies the global rank that send the Python object to.

107 Default: ``0``.

108 group (str, optional): Communication group. Default: ``None``.

109 """

110 if group is None:

111 group = GlobalComm.WORLD_COMM_GROUP

112 if not isinstance(group, str):

113 raise TypeError(f"For 'send_object', the argument 'group' must be type of string, \

114 but got 'group' type : {type(group)}.")

115 if not isinstance(dst, int):

116 raise TypeError("For send_object, the dst must be int.")

117 obj_tensor, tensor_size = _object_to_tensor(obj)

118 obj_size = Tensor([tensor_size], dtype=mstype.int32)

119 send(obj_size, dst, group)

120 send(obj_tensor, dst, group)

121

122

123def recv_object_list(recv_obj, src=0, group=None):

124 """

125 receive Python object from src rank.

126

127 Args:

128 recv_obj (list): list to recv python objects.

129 src (int, optional): Specifies the global rank that receive the Python object.

130 Default: ``0`` .

131 group (str, optional): Communication group. Default: ``None``.

132 """

133 if group is None:

134 group = GlobalComm.WORLD_COMM_GROUP

135 if not isinstance(group, str):

136 raise TypeError(f"For 'recv_object', the argument 'group' must be type of string, \

137 but got 'group' type : {type(group)}.")

138 if not isinstance(src, int):

139 raise TypeError("For recv_object, the src must be int.")

140 obj_size = Tensor([0], dtype=mstype.int32)

141 recv(obj_size, src, group)

142 size_val = obj_size.item()

143 obj_tensor = mint.empty([size_val], dtype=mstype.int8)

144 recv(obj_tensor, src, group)

145 buf = obj_tensor.asnumpy().tobytes()[:size_val]

146 recv_obj.clear()

147 recv_obj.append(pickle.Unpickler(io.BytesIO(buf)).load()[0])

Coverage for / home / jenkins / .local / lib / python3.10 / site-packages / hyper_parallel / platform / mindspore / pipeline_parallel / _utils.py: 19%

80 statements