Coverage for /home/jenkins/.local/lib/python3.10/site-packages/hyper_parallel/core/distributed

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ============================================================================

15"""Checkpoint metadata structures for distributed checkpoint save and load."""

16from dataclasses import dataclass, field

17from typing import Any, Optional, Union

20CHUNK_INFO = "chunk_info"

22@dataclass(frozen=True)

23class MetadataIndex:

24 """

25 Index to identify a specific piece of data in the checkpoint.

27 Attributes:

28 fqn: Fully qualified name of the tensor/object.

29 offset: Offset in the tensor (for sharded tensors). Default ().

30 index: Index for sharded tensors (None for non-sharded). Default None.

31 """

32 fqn: str

33 offset: tuple = field(default_factory=tuple)

34 index: Optional[int] = None

37@dataclass(frozen=True)

38class ChunkStorageMetadata:

39 """

40 Metadata for a chunk of storage.

42 Represents a portion of a distributed tensor stored in the checkpoint.

44 Attributes:

45 offsets: Offsets in the global tensor for each dimension.

46 sizes: Sizes of the chunk for each dimension.

47 """

48 offsets: tuple

49 sizes: tuple

52@dataclass(frozen=True)

53class ChunkInfo:

54 """

55 Info for a tensor chunk.

57 Represents a portion of a distributed tensor stored in the checkpoint.

59 Attributes:

60 chunk: Offsets in the global tensor for each dimension.

61 global_shape: Sizes of the chunk for each dimension.

62 """

63 chunk: ChunkStorageMetadata

64 global_shape: tuple

67@dataclass(frozen=True)

68class TensorProperties:

69 """

70 Properties of a tensor.

72 Attributes:

73 dtype: Data type of the tensor (as string).

74 requires_grad: Whether the tensor requires gradients. Default False.

75 memory_format: Memory format (optional). Default None.

76 """

77 dtype: str

78 requires_grad: bool = False

79 memory_format: Optional[str] = None

82@dataclass

83class BytesStorageMetadata:

84 """Metadata for bytes data stored in checkpoint."""

87@dataclass(frozen=True)

88class TensorStorageMetadata:

89 """

90 Metadata for a distributed tensor.

92 Contains properties, global size, and list of chunks stored across ranks.

94 Attributes:

95 properties: Tensor properties (dtype, etc.).

96 size: Global size of the tensor.

97 chunks: List of chunks stored in the checkpoint. Default [].

98 """

99 properties: TensorProperties

100 size: tuple

101 chunks: list[ChunkStorageMetadata] = field(default_factory=list)

102

103

104@dataclass

105class Metadata:

106 """

107 Global metadata for a checkpoint.

108

109 Contains metadata for all items in the state_dict, along with planner and storage-specific data.

110

111 Attributes:

112 state_dict_metadata: Mapping from FQN to storage metadata.

113 planner_data: Planner-specific data (optional). Default None.

114 storage_data: Storage-specific data (optional). Default None.

115 version: Checkpoint format version. Default "1.0".

116 """

117 state_dict_metadata: dict[str, Union[TensorStorageMetadata, BytesStorageMetadata]]

118 planner_data: Any = None # Planner-specific data (can be any type)

119 storage_data: Optional[dict[MetadataIndex, Any]] = None # Storage mapping: MetadataIndex -> StorageInfo

120 version: str = "1.0"

Coverage for / home / jenkins / .local / lib / python3.10 / site-packages / hyper_parallel / core / distributed_checkpoint / metadata.py: 100%

34 statements