iden.shard
iden.shard ¶
Contain shard implementations.
iden.shard.BaseShard ¶
              Bases: Generic[T], ABC
Define the base class to implement a shard.
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.io import save_json
>>> from iden.shard import JsonShard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     uri = Path(tmpdir).joinpath("uri/0001").as_uri()
...     file = Path(tmpdir).joinpath("data.json")
...     save_json([1, 2, 3], file)
...     shard = JsonShard(uri=uri, path=file)
...     shard.get_data()
...
[1, 2, 3]
            iden.shard.BaseShard.clear
  
      abstractmethod
  
¶
clear() -> None
Clear the current shard cache i.e. remove from memory the data if possible.
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.io import save_json
>>> from iden.shard import JsonShard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     uri = Path(tmpdir).joinpath("uri/0001").as_uri()
...     file = Path(tmpdir).joinpath("data.json")
...     save_json([1, 2, 3], file)
...     shard = JsonShard(uri=uri, path=file)
...     data = shard.get_data(cache=True)
...     data
...     data.append(4)  # in-place modification
...     data = shard.get_data()
...     data
...     shard.clear()
...     data = shard.get_data()
...     data
...
[1, 2, 3]
[1, 2, 3, 4]
[1, 2, 3]
            iden.shard.BaseShard.equal
  
      abstractmethod
  
¶
equal(other: Any, equal_nan: bool = False) -> bool
Indicate if two shards are equal or not.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| other | Any | The object to compare with. | required | 
| equal_nan | bool | If  | False | 
Returns:
| Type | Description | 
|---|---|
| bool | 
 | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import JsonShard, create_json_shard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     uri1 = Path(tmpdir).joinpath("my_uri1").as_uri()
...     uri2 = Path(tmpdir).joinpath("my_uri2").as_uri()
...     shard1 = create_json_shard([1, 2, 3], uri=uri1)
...     shard2 = create_json_shard([4, 5, 6], uri=uri2)
...     shard3 = JsonShard.from_uri(uri=uri1)
...     shard1.equal(shard2)
...     shard1.equal(shard3)
...
False
True
            iden.shard.BaseShard.get_data
  
      abstractmethod
  
¶
get_data(cache: bool = False) -> T
Get the data in the shard.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| cache | bool | If  | False | 
Returns:
| Type | Description | 
|---|---|
| T | The data in the shard. | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.io import save_json
>>> from iden.shard import JsonShard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     uri = Path(tmpdir).joinpath("uri/0001").as_uri()
...     file = Path(tmpdir).joinpath("data.json")
...     save_json([1, 2, 3], file)
...     shard = JsonShard(uri=uri, path=file)
...     shard.get_data()
...
[1, 2, 3]
            iden.shard.BaseShard.get_uri
  
      abstractmethod
  
¶
get_uri() -> str | None
Get the Uniform Resource Identifier (URI) of the shard.
Returns:
| Type | Description | 
|---|---|
| str | None | The Uniform Resource Identifier (URI). | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.io import save_json
>>> from iden.shard import JsonShard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     uri = Path(tmpdir).joinpath("uri/0001").as_uri()
...     file = Path(tmpdir).joinpath("data.json")
...     save_json([1, 2, 3], file)
...     shard = JsonShard(uri=uri, path=file)
...     shard.get_uri()
...
'file:///.../uri/0001'
            iden.shard.BaseShard.is_cached
  
      abstractmethod
  
¶
is_cached() -> bool
Indicate if the data in the shard are cached or not.
Returns:
| Type | Description | 
|---|---|
| bool | 
 | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.io import save_json
>>> from iden.shard import JsonShard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     uri = Path(tmpdir).joinpath("uri/0001").as_uri()
...     file = Path(tmpdir).joinpath("data.json")
...     save_json([1, 2, 3], file)
...     shard = JsonShard(uri=uri, path=file)
...     shard.is_cached()
...     data = shard.get_data(cache=True)
...     shard.is_cached()
...     shard.clear()
...     shard.is_cached()
...
False
True
False
iden.shard.CloudpickleShard ¶
              Bases: FileShard[Any]
Implement a cloudpickle shard.
The data are stored in a cloudpickle file.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| uri | str | The shard's URI. | required | 
| path | Path | str | The path to the cloudpickle file. | required | 
Raises:
| Type | Description | 
|---|---|
| RuntimeError | if  | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import CloudpickleShard
>>> from iden.io import save_pickle
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     file = Path(tmpdir).joinpath("data.pkl")
...     save_pickle([1, 2, 3], file)
...     shard = CloudpickleShard(uri="file:///data/1234456789", path=file)
...     shard.get_data()
...
[1, 2, 3]
            iden.shard.CloudpickleShard.generate_uri_config
  
      classmethod
  
¶
generate_uri_config(path: Path) -> dict
Generate the minimal config that is used to load the shard from its URI.
The config must be compatible with the JSON format.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| path | Path | The path to the pickle file. | required | 
Returns:
| Type | Description | 
|---|---|
| dict | The minimal config to load the shard from its URI. | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import CloudpickleShard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     file = Path(tmpdir).joinpath("data.pkl")
...     CloudpickleShard.generate_uri_config(file)
...
{'kwargs': {'path': '.../data.pkl'},
 'loader': {'_target_': 'iden.shard.loader.CloudpickleShardLoader'}}
iden.shard.FileShard ¶
              Bases: BaseShard[T]
Implement a generic shard where the data are stored in a single file.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| uri | str | The shard's URI. | required | 
| path | Path | str | The path to the pickle file. | required | 
| loader | BaseLoader[T] | dict | None | The data loader or its configuration. | None | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import FileShard
>>> from iden.io import save_json, JsonLoader
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     file = Path(tmpdir).joinpath("data.json")
...     save_json([1, 2, 3], file)
...     uri = Path(tmpdir).joinpath("my_uri").as_uri()
...     shard = FileShard(uri=uri, path=file, loader=JsonLoader())
...     shard.get_data()
...
[1, 2, 3]
            iden.shard.FileShard.from_uri
  
      classmethod
  
¶
from_uri(uri: str) -> FileShard
Instantiate a shard from its URI.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| uri | str | The URI. | required | 
Returns:
| Type | Description | 
|---|---|
| FileShard | The instantiated shard. | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import FileShard, create_json_shard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     uri = Path(tmpdir).joinpath("my_uri").as_uri()
...     _ = create_json_shard([1, 2, 3], uri=uri)
...     shard = FileShard.from_uri(uri)
...     shard
...
FileShard(uri=file:///.../my_uri)
            iden.shard.FileShard.generate_uri_config
  
      classmethod
  
¶
generate_uri_config(path: Path) -> dict
Generate the minimal config that is used to load the shard from its URI.
The config must be compatible with the JSON format.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| path | Path | The path to the json file. | required | 
Returns:
| Type | Description | 
|---|---|
| dict | The minimal config to load the shard from its URI. | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import FileShard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     file = Path(tmpdir).joinpath("data.json")
...     FileShard.generate_uri_config(file)
...
{'kwargs': {'path': '.../data.json'},
 'loader': {'_target_': 'iden.shard.loader.FileShardLoader'}}
iden.shard.InMemoryShard ¶
              Bases: BaseShard[Any]
Implement an in-memory shard.
This shard does not have valid URI as the data are stored in-memory.
Example usage:
>>> from iden.shard import InMemoryShard
>>> shard = InMemoryShard([1, 2, 3])
>>> shard.get_data()
[1, 2, 3]
iden.shard.JoblibShard ¶
              Bases: FileShard[Any]
Implement a joblib shard.
The data are stored in a joblib file.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| uri | str | The shard's URI. | required | 
| path | Path | str | The path to the joblib file. | required | 
Raises:
| Type | Description | 
|---|---|
| RuntimeError | if  | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import JoblibShard
>>> from iden.io import save_pickle
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     file = Path(tmpdir).joinpath("data.joblib")
...     save_pickle([1, 2, 3], file)
...     shard = JoblibShard(uri="file:///data/1234456789", path=file)
...     shard.get_data()
...
[1, 2, 3]
            iden.shard.JoblibShard.generate_uri_config
  
      classmethod
  
¶
generate_uri_config(path: Path) -> dict
Generate the minimal config that is used to load the shard from its URI.
The config must be compatible with the JSON format.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| path | Path | The path to the pickle file. | required | 
Returns:
| Type | Description | 
|---|---|
| dict | The minimal config to load the shard from its URI. | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import JoblibShard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     file = Path(tmpdir).joinpath("data.joblib")
...     JoblibShard.generate_uri_config(file)
...
{'kwargs': {'path': '.../data.joblib'},
 'loader': {'_target_': 'iden.shard.loader.JoblibShardLoader'}}
iden.shard.JsonShard ¶
              Bases: FileShard[Any]
Implement a JSON shard.
The data are stored in a JSON file.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| uri | str | The shard's URI. | required | 
| path | Path | str | The path to the JSON file. | required | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import JsonShard
>>> from iden.io import save_json
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     file = Path(tmpdir).joinpath("data.json")
...     save_json([1, 2, 3], file)
...     shard = JsonShard(uri="file:///data/1234456789", path=file)
...     shard.get_data()
...
[1, 2, 3]
            iden.shard.JsonShard.generate_uri_config
  
      classmethod
  
¶
generate_uri_config(path: Path) -> dict
Generate the minimal config that is used to load the shard from its URI.
The config must be compatible with the JSON format.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| path | Path | The path to the json file. | required | 
Returns:
| Type | Description | 
|---|---|
| dict | The minimal config to load the shard from its URI. | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import JsonShard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     file = Path(tmpdir).joinpath("data.json")
...     JsonShard.generate_uri_config(file)
...
{'kwargs': {'path': '.../data.json'},
 'loader': {'_target_': 'iden.shard.loader.JsonShardLoader'}}
iden.shard.NumpySafetensorsShard ¶
              Bases: FileShard[dict[str, ndarray]]
Implement a safetensors shard for numpy.ndarrays.
The data are stored in a safetensors file.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| uri | str | The shard's URI. | required | 
| path | Path | str | The path to the safetensors file. | required | 
Raises:
| Type | Description | 
|---|---|
| RuntimeError | if  | 
Example usage:
>>> import tempfile
>>> import numpy as np
>>> from pathlib import Path
>>> from iden.shard import NumpySafetensorsShard
>>> from iden.io.safetensors import NumpySaver
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     file = Path(tmpdir).joinpath("data.safetensors")
...     NumpySaver().save({"key1": np.ones((2, 3)), "key2": np.arange(5)}, file)
...     shard = NumpySafetensorsShard(uri="file:///data/1234456789", path=file)
...     shard.get_data()
...
{'key1': array([[1., 1., 1.], [1., 1., 1.]]), 'key2': array([0, 1, 2, 3, 4])}
            iden.shard.NumpySafetensorsShard.generate_uri_config
  
      classmethod
  
¶
generate_uri_config(path: Path) -> dict
Generate the minimal config that is used to load the shard from its URI.
The config must be compatible with the JSON format.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| path | Path | The path to the pickle file. | required | 
Returns:
| Type | Description | 
|---|---|
| dict | The minimal config to load the shard from its URI. | 
Example usage:
>>> import tempfile
>>> import torch
>>> from pathlib import Path
>>> from iden.shard import NumpySafetensorsShard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     file = Path(tmpdir).joinpath("data.safetensors")
...     NumpySafetensorsShard.generate_uri_config(file)
...
{'kwargs': {'path': '.../data.safetensors'},
 'loader': {'_target_': 'iden.shard.loader.NumpySafetensorsShardLoader'}}
iden.shard.PickleShard ¶
              Bases: FileShard[Any]
Implement a pickle shard.
The data are stored in a pickle file.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| uri | str | The shard's URI. | required | 
| path | Path | str | The path to the pickle file. | required | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import PickleShard
>>> from iden.io import save_pickle
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     file = Path(tmpdir).joinpath("data.pkl")
...     save_pickle([1, 2, 3], file)
...     shard = PickleShard(uri="file:///data/1234456789", path=file)
...     shard.get_data()
...
[1, 2, 3]
            iden.shard.PickleShard.generate_uri_config
  
      classmethod
  
¶
generate_uri_config(path: Path) -> dict
Generate the minimal config that is used to load the shard from its URI.
The config must be compatible with the JSON format.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| path | Path | The path to the pickle file. | required | 
Returns:
| Type | Description | 
|---|---|
| dict | The minimal config to load the shard from its URI. | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import PickleShard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     file = Path(tmpdir).joinpath("data.pkl")
...     PickleShard.generate_uri_config(file)
...
{'kwargs': {'path': '.../data.pkl'},
 'loader': {'_target_': 'iden.shard.loader.PickleShardLoader'}}
iden.shard.ShardDict ¶
              Bases: BaseShard[T]
Implement a data structure to manage a dictionary of shards.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| uri | str | The shard's URI. | required | 
| shards | dict[str, BaseShard[T]] | The dictionary of shards. | required | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.dataset import VanillaDataset
>>> from iden.shard import create_json_shard, ShardDict
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     shards = {
...         "train": create_json_shard(
...             [1, 2, 3], uri=Path(tmpdir).joinpath("shards/uri1").as_uri()
...         ),
...         "val": create_json_shard(
...             [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shards/uri2").as_uri()
...         ),
...     }
...     sd = ShardDict(uri=Path(tmpdir).joinpath("uri").as_uri(), shards=shards)
...     sd
...
ShardDict(
  (uri): file:///.../uri
  (shards):
    (train): JsonShard(uri=file:///.../shards/uri1)
    (val): JsonShard(uri=file:///.../shards/uri2)
)
            iden.shard.ShardDict.from_uri
  
      classmethod
  
¶
from_uri(uri: str) -> ShardDict[T]
Instantiate a shard from its URI.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| uri | str | The URI. | required | 
Returns:
| Type | Description | 
|---|---|
| ShardDict[T] | The instantiated shard. | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import ShardDict, create_json_shard, create_shard_dict
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     shards = {
...         "train": create_json_shard(
...             [1, 2, 3], uri=Path(tmpdir).joinpath("shard/uri1").as_uri()
...         ),
...         "val": create_json_shard(
...             [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shard/uri2").as_uri()
...         ),
...     }
...     uri = Path(tmpdir).joinpath("uri").as_uri()
...     _ = create_shard_dict(shards, uri=uri)
...     shard = ShardDict.from_uri(uri)
...     shard
...
ShardDict(
  (uri): file:///.../uri
  (shards):
    (train): JsonShard(uri=file:///.../shard/uri1)
    (val): JsonShard(uri=file:///.../shard/uri2)
)
            iden.shard.ShardDict.generate_uri_config
  
      classmethod
  
¶
generate_uri_config(
    shards: dict[str, BaseShard[T]],
) -> dict
Generate the minimal config that is used to load the shard from its URI.
The config must be compatible with the JSON format.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| shards | dict[str, BaseShard[T]] | The shards. | required | 
Returns:
| Type | Description | 
|---|---|
| dict | The minimal config to load the shard from its URI. | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import ShardDict, create_json_shard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     shards = {
...         "train": create_json_shard(
...             [1, 2, 3], uri=Path(tmpdir).joinpath("shard/uri1").as_uri()
...         ),
...         "val": create_json_shard(
...             [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shard/uri2").as_uri()
...         ),
...     }
...     ShardDict.generate_uri_config(shards)
...
{'shards': {'train': 'file:///.../shard/uri1', 'val': 'file:///.../shard/uri2'},
 'loader': {'_target_': 'iden.shard.loader.ShardDictLoader'}}
iden.shard.ShardDict.get_shard ¶
get_shard(shard_id: str) -> Any
Get a shard.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| shard_id | str | The shard ID. | required | 
Returns:
| Type | Description | 
|---|---|
| Any | The shard. | 
Raises:
| Type | Description | 
|---|---|
| ShardNotFoundError | if the shard does not exist. | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_json_shard, ShardDict
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     shards = {
...         "train": create_json_shard(
...             [1, 2, 3], uri=Path(tmpdir).joinpath("shard/uri1").as_uri()
...         ),
...         "val": create_json_shard(
...             [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shard/uri2").as_uri()
...         ),
...     }
...     sd = ShardDict(uri=Path(tmpdir).joinpath("main_uri").as_uri(), shards=shards)
...     sd.get_shard("train")
...
JsonShard(uri=file:///.../uri1)
iden.shard.ShardDict.get_shard_ids ¶
get_shard_ids() -> set[str]
Get the shard IDs.
Returns:
| Type | Description | 
|---|---|
| set[str] | The shard IDs. | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_json_shard, ShardDict
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     shards = {
...         "train": create_json_shard(
...             [1, 2, 3], uri=Path(tmpdir).joinpath("shard/uri1").as_uri()
...         ),
...         "val": create_json_shard(
...             [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shard/uri2").as_uri()
...         ),
...     }
...     sd = ShardDict(uri=Path(tmpdir).joinpath("main_uri").as_uri(), shards=shards)
...     sorted(sd.get_shard_ids())
...
['train', 'val']
iden.shard.ShardDict.has_shard ¶
has_shard(shard_id: str) -> bool
Indicate if the shard exists or not.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| shard_id | str | The shard ID. | required | 
Returns:
| Type | Description | 
|---|---|
| bool | 
 | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_json_shard, ShardDict
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     shards = {
...         "train": create_json_shard(
...             [1, 2, 3], uri=Path(tmpdir).joinpath("shard/uri1").as_uri()
...         ),
...         "val": create_json_shard(
...             [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shard/uri2").as_uri()
...         ),
...     }
...     sd = ShardDict(uri=Path(tmpdir).joinpath("main_uri").as_uri(), shards=shards)
...     sd.has_shard("train")
...     sd.has_shard("test")
...
True
False
iden.shard.ShardTuple ¶
              Bases: BaseShard[tuple[BaseShard[T], ...]]
Implement a data structure to manage a tuple of shards.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| uri | str | The shard's URI. | required | 
| shards | Iterable[BaseShard[T]] | The tuple of shards. | required | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_json_shard
>>> from iden.shard import ShardTuple
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     shards = [
...         create_json_shard([1, 2, 3], uri=Path(tmpdir).joinpath("shards/uri1").as_uri()),
...         create_json_shard(
...             [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shards/uri2").as_uri()
...         ),
...     ]
...     sl = ShardTuple(uri=Path(tmpdir).joinpath("uri").as_uri(), shards=shards)
...     sl
...
ShardTuple(
  (uri): file:///.../uri
  (shards):
    (0): JsonShard(uri=file:///.../shards/uri1)
    (1): JsonShard(uri=file:///.../shards/uri2)
)
            iden.shard.ShardTuple.from_uri
  
      classmethod
  
¶
from_uri(uri: str) -> ShardTuple[T]
Instantiate a shard from its URI.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| uri | str | The URI. | required | 
Returns:
| Type | Description | 
|---|---|
| ShardTuple[T] | The instantiated shard. | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import ShardTuple, create_json_shard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     shards = [
...         create_json_shard([1, 2, 3], uri=Path(tmpdir).joinpath("shard/uri1").as_uri()),
...         create_json_shard(
...             [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shard/uri2").as_uri()
...         ),
...     ]
...     uri = Path(tmpdir).joinpath("uri").as_uri()
...     create_shard_tuple(shards, uri=uri)
...     shard = ShardTuple.from_uri(uri)
...     shard
...
ShardTuple(
  (uri): file:///.../uri
  (shards):
    (0): JsonShard(uri=file:///.../shard/uri1)
    (1): JsonShard(uri=file:///.../shard/uri2)
)
            iden.shard.ShardTuple.generate_uri_config
  
      classmethod
  
¶
generate_uri_config(
    shards: Iterable[BaseShard[T]],
) -> dict
Generate the minimal config that is used to load the shard from its URI.
The config must be compatible with the JSON format.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| shards | Iterable[BaseShard[T]] | The shards. | required | 
Returns:
| Type | Description | 
|---|---|
| dict | The minimal config to load the shard from its URI. | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import ShardTuple, create_json_shard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     shards = [
...         create_json_shard([1, 2, 3], uri=Path(tmpdir).joinpath("shard/uri1").as_uri()),
...         create_json_shard(
...             [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shard/uri2").as_uri()
...         ),
...     ]
...     ShardTuple.generate_uri_config(shards)
...
{'shards': ['file:///.../shard/uri1', 'file:///.../shard/uri2'],
 'loader': {'_target_': 'iden.shard.loader.ShardTupleLoader'}}
iden.shard.ShardTuple.get ¶
get(index: int) -> BaseShard[T]
Get a shard.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| index | int | The shard index to get. | required | 
Returns:
| Type | Description | 
|---|---|
| BaseShard[T] | The shard. | 
Raises:
| Type | Description | 
|---|---|
| IndexError | if the index is outside the tuple range. | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_json_shard
>>> from iden.shard import ShardTuple
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     shards = [
...         create_json_shard([1, 2, 3], uri=Path(tmpdir).joinpath("shard/uri1").as_uri()),
...         create_json_shard(
...             [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shard/uri2").as_uri()
...         ),
...     ]
...     sl = ShardTuple(uri=Path(tmpdir).joinpath("main_uri").as_uri(), shards=shards)
...     sl.get(0)
...
JsonShard(uri=file:///.../uri1)
iden.shard.ShardTuple.is_sorted_by_uri ¶
is_sorted_by_uri() -> bool
Indicate if the shards are sorted by ascending order of URIs or not.
Returns:
| Type | Description | 
|---|---|
| bool | 
 | 
iden.shard.TorchSafetensorsShard ¶
              Bases: FileShard[dict[str, Tensor]]
Implement a safetensors shard for torch.Tensors.
The data are stored in a safetensors file.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| uri | str | The shard's URI. | required | 
| path | Path | str | The path to the safetensors file. | required | 
Raises:
| Type | Description | 
|---|---|
| RuntimeError | if  | 
Example usage:
>>> import tempfile
>>> import torch
>>> from pathlib import Path
>>> from iden.shard import TorchSafetensorsShard
>>> from iden.io.safetensors import TorchSaver
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     file = Path(tmpdir).joinpath("data.safetensors")
...     TorchSaver().save({"key1": torch.ones(2, 3), "key2": torch.arange(5)}, file)
...     shard = TorchSafetensorsShard(uri="file:///data/1234456789", path=file)
...     shard.get_data()
...
{'key1': tensor([[1., 1., 1.], [1., 1., 1.]]), 'key2': tensor([0, 1, 2, 3, 4])}
            iden.shard.TorchSafetensorsShard.generate_uri_config
  
      classmethod
  
¶
generate_uri_config(path: Path) -> dict
Generate the minimal config that is used to load the shard from its URI.
The config must be compatible with the JSON format.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| path | Path | The path to the pickle file. | required | 
Returns:
| Type | Description | 
|---|---|
| dict | The minimal config to load the shard from its URI. | 
Example usage:
>>> import tempfile
>>> import torch
>>> from pathlib import Path
>>> from iden.shard import TorchSafetensorsShard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     file = Path(tmpdir).joinpath("data.safetensors")
...     TorchSafetensorsShard.generate_uri_config(file)
...
{'kwargs': {'path': '.../data.safetensors'},
 'loader': {'_target_': 'iden.shard.loader.TorchSafetensorsShardLoader'}}
iden.shard.TorchShard ¶
              Bases: FileShard[Any]
Implement a PyTorch shard for torch.Tensors.
The data are stored in a PyTorch file.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| uri | str | The shard's URI. | required | 
| path | Path | str | The path to the PyTorch file. | required | 
Raises:
| Type | Description | 
|---|---|
| RuntimeError | if  | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import TorchShard
>>> from iden.io import TorchSaver
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     file = Path(tmpdir).joinpath("data.pt")
...     TorchSaver().save({"key1": torch.ones(2, 3), "key2": torch.arange(5)}, file)
...     shard = TorchShard(uri="file:///data/1234456789", path=file)
...     shard.get_data()
...
{'key1': tensor([[1., 1., 1.], [1., 1., 1.]]), 'key2': tensor([0, 1, 2, 3, 4])}
            iden.shard.TorchShard.generate_uri_config
  
      classmethod
  
¶
generate_uri_config(path: Path) -> dict
Generate the minimal config that is used to load the shard from its URI.
The config must be compatible with the JSON format.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| path | Path | The path to the pickle file. | required | 
Returns:
| Type | Description | 
|---|---|
| dict | The minimal config to load the shard from its URI. | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import TorchShard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     file = Path(tmpdir).joinpath("data.pt")
...     TorchShard.generate_uri_config(file)
...
{'kwargs': {'path': '.../data.pt'},
 'loader': {'_target_': 'iden.shard.loader.TorchShardLoader'}}
iden.shard.YamlShard ¶
              Bases: FileShard[Any]
Implement a YAML shard.
The data are stored in a YAML file.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| uri | str | The shard's URI. | required | 
| path | Path | str | The path to the YAML file. | required | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import YamlShard
>>> from iden.io import save_yaml
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     file = Path(tmpdir).joinpath("data.yaml")
...     save_yaml([1, 2, 3], file)
...     shard = YamlShard(uri="file:///data/1234456789", path=file)
...     shard.get_data()
...
[1, 2, 3]
            iden.shard.YamlShard.generate_uri_config
  
      classmethod
  
¶
generate_uri_config(path: Path) -> dict
Generate the minimal config that is used to load the shard from its URI.
The config must be compatible with the YAML format.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| path | Path | The path to the yaml file. | required | 
Returns:
| Type | Description | 
|---|---|
| dict | The minimal config to load the shard from its URI. | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import YamlShard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     file = Path(tmpdir).joinpath("data.yaml")
...     YamlShard.generate_uri_config(file)
...
{'kwargs': {'path': '.../data.yaml'},
 'loader': {'_target_': 'iden.shard.loader.YamlShardLoader'}}
iden.shard.create_cloudpickle_shard ¶
create_cloudpickle_shard(
    data: Any, uri: str, path: Path | None = None
) -> CloudpickleShard
Create a CloudpickleShard from data.
Note
It is a utility function to create a CloudpickleShard from its
    data and URI. It is possible to create a CloudpickleShard
    in other ways.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| data | Any | The data to save in the cloudpickle file. | required | 
| uri | str | The shard's URI. | required | 
| path | Path | None | The path to the cloudpickle file. If  | None | 
Returns:
| Type | Description | 
|---|---|
| CloudpickleShard | The  | 
Raises:
| Type | Description | 
|---|---|
| RuntimeError | if  | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_pickle_shard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     shard = create_pickle_shard([1, 2, 3], uri=Path(tmpdir).joinpath("my_uri").as_uri())
...     shard.get_data()
...
[1, 2, 3]
iden.shard.create_joblib_shard ¶
create_joblib_shard(
    data: Any, uri: str, path: Path | None = None
) -> JoblibShard
Create a JoblibShard from data.
Note
It is a utility function to create a JoblibShard from its
    data and URI. It is possible to create a JoblibShard
    in other ways.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| data | Any | The data to save in the joblib file. | required | 
| uri | str | The shard's URI. | required | 
| path | Path | None | The path to the joblib file. If  | None | 
Returns:
| Type | Description | 
|---|---|
| JoblibShard | The  | 
Raises:
| Type | Description | 
|---|---|
| RuntimeError | if  | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_pickle_shard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     shard = create_pickle_shard([1, 2, 3], uri=Path(tmpdir).joinpath("my_uri").as_uri())
...     shard.get_data()
...
[1, 2, 3]
iden.shard.create_json_shard ¶
create_json_shard(
    data: Any, uri: str, path: Path | None = None
) -> JsonShard
Create a JsonShard from data.
Note
It is a utility function to create a JsonShard from its
    data and URI. It is possible to create a JsonShard
    in other ways.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| data | Any | The data to save in the json file. | required | 
| uri | str | The shard's URI. | required | 
| path | Path | None | The path to the JSON file. If  | None | 
Returns:
| Type | Description | 
|---|---|
| JsonShard | The  | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_json_shard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     shard = create_json_shard([1, 2, 3], uri=Path(tmpdir).joinpath("my_uri").as_uri())
...     shard.get_data()
...
[1, 2, 3]
iden.shard.create_numpy_safetensors_shard ¶
create_numpy_safetensors_shard(
    data: dict[str, ndarray],
    uri: str,
    path: Path | None = None,
) -> NumpySafetensorsShard
Create a NumpySafetensorsShard from data.
Note
It is a utility function to create a NumpySafetensorsShard
    from its data and URI. It is possible to create a
    NumpySafetensorsShard in other ways.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| data | dict[str, ndarray] | The data to save in the safetensors file. | required | 
| uri | str | The shard's URI. | required | 
| path | Path | None | The path to the safetensors file. If  | None | 
Returns:
| Type | Description | 
|---|---|
| NumpySafetensorsShard | The  | 
Raises:
| Type | Description | 
|---|---|
| RuntimeError | if  | 
Example usage:
>>> import tempfile
>>> import torch
>>> from pathlib import Path
>>> from iden.shard import create_numpy_safetensors_shard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     shard = create_numpy_safetensors_shard(
...         data={"key1": np.ones((2, 3)), "key2": np.arange(5)},
...         uri=Path(tmpdir).joinpath("my_uri").as_uri(),
...     )
...     shard.get_data()
...
{'key1': array([[1., 1., 1.], [1., 1., 1.]]), 'key2': array([0, 1, 2, 3, 4])}
iden.shard.create_pickle_shard ¶
create_pickle_shard(
    data: Any, uri: str, path: Path | None = None
) -> PickleShard
Create a PickleShard from data.
Note
It is a utility function to create a PickleShard from its
    data and URI. It is possible to create a PickleShard
    in other ways.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| data | Any | The data to save in the pickle file. | required | 
| uri | str | The shard's URI. | required | 
| path | Path | None | The path to the pickle file. If  | None | 
Returns:
| Type | Description | 
|---|---|
| PickleShard | The  | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_pickle_shard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     shard = create_pickle_shard([1, 2, 3], uri=Path(tmpdir).joinpath("my_uri").as_uri())
...     shard.get_data()
...
[1, 2, 3]
iden.shard.create_shard_dict ¶
Create a ShardDict a list of shards.
Note
It is a utility function to create a ShardDict from its
    shards and URI. It is possible to create a ShardDict
    in other ways.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| shards | dict[str, BaseShard[T]] | The shards. | required | 
| uri | str | The shard's URI. | required | 
Returns:
| Type | Description | 
|---|---|
| ShardDict[T] | The  | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import ShardDict, create_json_shard, create_shard_dict
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     shards = {
...         "train": create_json_shard(
...             [1, 2, 3], uri=Path(tmpdir).joinpath("shard/uri1").as_uri()
...         ),
...         "val": create_json_shard(
...             [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shard/uri2").as_uri()
...         ),
...     }
...     shard = create_shard_dict(shards, uri=Path(tmpdir).joinpath("uri").as_uri())
...     shard
...
ShardDict(
  (uri): file:///.../uri
  (shards):
    (train): JsonShard(uri=file:///.../shard/uri1)
    (val): JsonShard(uri=file:///.../shard/uri2)
)
iden.shard.create_shard_tuple ¶
create_shard_tuple(
    shards: Iterable[BaseShard[T]], uri: str
) -> ShardTuple[T]
Create a ShardTuple a list of shards.
Note
It is a utility function to create a ShardTuple from its
    shards and URI. It is possible to create a ShardTuple
    in other ways.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| shards | Iterable[BaseShard[T]] | The shards. | required | 
| uri | str | The shard's URI. | required | 
Returns:
| Type | Description | 
|---|---|
| ShardTuple[T] | The  | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import ShardTuple, create_json_shard, create_shard_tuple
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     shards = [
...         create_json_shard([1, 2, 3], uri=Path(tmpdir).joinpath("shard/uri1").as_uri()),
...         create_json_shard(
...             [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shard/uri2").as_uri()
...         ),
...     ]
...     shard = create_shard_tuple(shards, uri=Path(tmpdir).joinpath("uri").as_uri())
...     shard
...
ShardTuple(
  (uri): file:///.../uri
  (shards):
    (0): JsonShard(uri=file:///.../shard/uri1)
    (1): JsonShard(uri=file:///.../shard/uri2)
)
iden.shard.create_torch_safetensors_shard ¶
create_torch_safetensors_shard(
    data: dict[str, Tensor],
    uri: str,
    path: Path | None = None,
) -> TorchSafetensorsShard
Create a TorchSafetensorsShard from data.
Note
It is a utility function to create a TorchSafetensorsShard
    from its data and URI. It is possible to create a
    TorchSafetensorsShard in other ways.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| data | dict[str, Tensor] | The data to save in the safetensors file. | required | 
| uri | str | The shard's URI. | required | 
| path | Path | None | The path to the safetensors file. If  | None | 
Returns:
| Type | Description | 
|---|---|
| TorchSafetensorsShard | The  | 
Raises:
| Type | Description | 
|---|---|
| RuntimeError | if  | 
Example usage:
>>> import tempfile
>>> import torch
>>> from pathlib import Path
>>> from iden.shard import create_torch_safetensors_shard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     shard = create_torch_safetensors_shard(
...         data={"key1": torch.ones(2, 3), "key2": torch.arange(5)},
...         uri=Path(tmpdir).joinpath("my_uri").as_uri(),
...     )
...     shard.get_data()
...
{'key1': tensor([[1., 1., 1.], [1., 1., 1.]]), 'key2': tensor([0, 1, 2, 3, 4])}
iden.shard.create_torch_shard ¶
create_torch_shard(
    data: Any, uri: str, path: Path | None = None
) -> TorchShard
Create a TorchShard from data.
Note
It is a utility function to create a TorchShard from its
    data and URI. It is possible to create a TorchShard
    in other ways.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| data | Any | The data to save in the PyTorch file. | required | 
| uri | str | The shard's URI. | required | 
| path | Path | None | The path to the PyTorch file. If  | None | 
Returns:
| Type | Description | 
|---|---|
| TorchShard | The  | 
Raises:
| Type | Description | 
|---|---|
| RuntimeError | if  | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> import torch
>>> from iden.shard import create_torch_shard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     shard = create_torch_shard(
...         data={"key1": torch.ones(2, 3), "key2": torch.arange(5)},
...         uri=Path(tmpdir).joinpath("my_uri").as_uri(),
...     )
...     shard.get_data()
...
{'key1': tensor([[1., 1., 1.], [1., 1., 1.]]), 'key2': tensor([0, 1, 2, 3, 4])}
iden.shard.create_yaml_shard ¶
create_yaml_shard(
    data: Any, uri: str, path: Path | None = None
) -> YamlShard
Create a YamlShard from data.
Note
It is a utility function to create a YamlShard from its
    data and URI. It is possible to create a YamlShard
    in other ways.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| data | Any | The data to save in the yaml file. | required | 
| uri | str | The shard's URI. | required | 
| path | Path | None | The path to the YAML file. If  | None | 
Returns:
| Type | Description | 
|---|---|
| YamlShard | The  | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_yaml_shard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     shard = create_yaml_shard([1, 2, 3], uri=Path(tmpdir).joinpath("my_uri").as_uri())
...     shard.get_data()
...
[1, 2, 3]
iden.shard.get_dict_uris ¶
get_dict_uris(
    shards: dict[str, BaseShard],
) -> dict[str, str]
Get the dictionary of shard's URI.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| shards | dict[str, BaseShard] | The dictionary of shards. | required | 
Returns:
| Type | Description | 
|---|---|
| dict[str, str] | The dictionary of shard's URI. | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_json_shard, get_dict_uris
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     shards = {
...         "train": create_json_shard(
...             [1, 2, 3], uri=Path(tmpdir).joinpath("shard/uri1").as_uri()
...         ),
...         "val": create_json_shard(
...             [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shard/uri2").as_uri()
...         ),
...     }
...     get_dict_uris(shards)
...
{'train': 'file:///.../shard/uri1', 'val': 'file:///.../shard/uri2'}
iden.shard.get_list_uris ¶
get_list_uris(shards: Iterable[BaseShard]) -> list[str]
Get the list of shard's URI.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| shards | Iterable[BaseShard] | The shards. | required | 
Returns:
| Type | Description | 
|---|---|
| list[str] | The tuple of shard's URI. | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import get_list_uris, create_json_shard
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     shards = [
...         create_json_shard([1, 2, 3], uri=Path(tmpdir).joinpath("shard/uri1").as_uri()),
...         create_json_shard(
...             [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shard/uri2").as_uri()
...         ),
...     ]
...     get_list_uris(shards)
...
['file:///.../shard/uri1', 'file:///.../shard/uri2']
iden.shard.load_from_uri ¶
load_from_uri(uri: str) -> BaseShard
Load a shard from its Uniform Resource Identifier (URI).
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| uri | str | The URI of the shard. | required | 
Returns:
| Type | Description | 
|---|---|
| BaseShard | The shard associated to the URI. | 
Raises:
| Type | Description | 
|---|---|
| FileNotFoundError | if the URI file does not exist. | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_json_shard, load_from_uri
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     uri = Path(tmpdir).joinpath("my_uri").as_uri()
...     _ = create_json_shard([1, 2, 3], uri=uri)
...     shard = load_from_uri(uri)
...     shard
...
JsonShard(uri=file:///.../my_uri)
iden.shard.sort_by_uri ¶
Sort a sequence of shards by their URIs.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| shards | Iterable[BaseShard] | The shards to sort. | required | 
| reverse | bool | If set to  | False | 
Returns:
| Type | Description | 
|---|---|
| list[BaseShard] | The sorted shards. | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_json_shard, sort_by_uri
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     shards = sort_by_uri(
...         [
...             create_json_shard([1, 2, 3], uri=Path(tmpdir).joinpath("uri2").as_uri()),
...             create_json_shard([4, 5, 6, 7], uri=Path(tmpdir).joinpath("uri3").as_uri()),
...             create_json_shard([4, 5, 6, 7], uri=Path(tmpdir).joinpath("uri1").as_uri()),
...         ]
...     )
...     shards
...
[JsonShard(uri=file:///.../uri1), JsonShard(uri=file:///.../uri2), JsonShard(uri=file:///.../uri3)]
iden.shard.generator ¶
Contain shard generator implementations.
iden.shard.generator.BaseShardGenerator ¶
              Bases: Generic[T], ABC
Define the base class to create a shard.
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.data.generator import DataGenerator
>>> from iden.shard.generator import JsonShardGenerator
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     generator = JsonShardGenerator(
...         data=DataGenerator([1, 2, 3]),
...         path_uri=Path(tmpdir).joinpath("uri"),
...         path_shard=Path(tmpdir).joinpath("data"),
...     )
...     generator
...     shard = generator.generate("shard1")
...     shard
...
JsonShardGenerator(
  (path_uri): PosixPath('/.../uri')
  (path_shard): PosixPath('/.../data')
  (data): DataGenerator(copy=False)
)
JsonShard(uri=file:///.../uri/shard1)
            iden.shard.generator.BaseShardGenerator.generate
  
      abstractmethod
  
¶
generate(shard_id: str) -> BaseShard[T]
Generate a shard.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| shard_id | str | The shard IDI. | required | 
Returns:
| Type | Description | 
|---|---|
| BaseShard[T] | The generated shard. | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.data.generator import DataGenerator
>>> from iden.shard.generator import JsonShardGenerator
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     generator = JsonShardGenerator(
...         data=DataGenerator([1, 2, 3]),
...         path_uri=Path(tmpdir).joinpath("uri"),
...         path_shard=Path(tmpdir).joinpath("data"),
...     )
...     shard = generator.generate("shard1")
...     shard
...
JsonShard(uri=file:///.../uri/shard1)
iden.shard.generator.CloudpickleShardGenerator ¶
              Bases: BaseFileShardGenerator[T]
Implement a cloudpickle shard generator.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| data | BaseDataGenerator[T] | dict | The data to save in the shard. | required | 
| path_uri | Path | The path where to save the URI file. | required | 
| path_shard | Path | The path where to save the shard data. | required | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.data.generator import DataGenerator
>>> from iden.shard.generator import CloudpickleShardGenerator
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     generator = CloudpickleShardGenerator(
...         data=DataGenerator([1, 2, 3]),
...         path_uri=Path(tmpdir).joinpath("uri"),
...         path_shard=Path(tmpdir).joinpath("data"),
...     )
...     generator
...     shard = generator.generate("shard1")
...     shard
...
CloudpickleShardGenerator(
  (path_uri): PosixPath('/.../uri')
  (path_shard): PosixPath('/.../data')
  (data): DataGenerator(copy=False)
)
CloudpickleShard(uri=file:///.../uri/shard1)
iden.shard.generator.JoblibShardGenerator ¶
              Bases: BaseFileShardGenerator[T]
Implement a joblib shard generator.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| data | BaseDataGenerator[T] | dict | The data to save in the shard. | required | 
| path_uri | Path | The path where to save the URI file. | required | 
| path_shard | Path | The path where to save the shard data. | required | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.data.generator import DataGenerator
>>> from iden.shard.generator import JoblibShardGenerator
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     generator = JoblibShardGenerator(
...         data=DataGenerator([1, 2, 3]),
...         path_uri=Path(tmpdir).joinpath("uri"),
...         path_shard=Path(tmpdir).joinpath("data"),
...     )
...     generator
...     shard = generator.generate("shard1")
...     shard
...
JoblibShardGenerator(
  (path_uri): PosixPath('/.../uri')
  (path_shard): PosixPath('/.../data')
  (data): DataGenerator(copy=False)
)
JoblibShard(uri=file:///.../uri/shard1)
iden.shard.generator.JsonShardGenerator ¶
              Bases: BaseFileShardGenerator[T]
Implement a JSON shard generator.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| data | BaseDataGenerator[T] | dict | The data to save in the shard. | required | 
| path_uri | Path | The path where to save the URI file. | required | 
| path_shard | Path | The path where to save the shard data. | required | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.data.generator import DataGenerator
>>> from iden.shard.generator import JsonShardGenerator
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     generator = JsonShardGenerator(
...         data=DataGenerator([1, 2, 3]),
...         path_uri=Path(tmpdir).joinpath("uri"),
...         path_shard=Path(tmpdir).joinpath("data"),
...     )
...     generator
...     shard = generator.generate("shard1")
...     shard
...
JsonShardGenerator(
  (path_uri): PosixPath('/.../uri')
  (path_shard): PosixPath('/.../data')
  (data): DataGenerator(copy=False)
)
JsonShard(uri=file:///.../uri/shard1)
iden.shard.generator.NumpySafetensorsShardGenerator ¶
              Bases: BaseFileShardGenerator[dict[str, ndarray]]
Implement a safetensors shard generator.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| data | BaseDataGenerator[dict[str, ndarray]] | dict | The data to save in the shard. | required | 
| path_uri | Path | The path where to save the URI file. | required | 
| path_shard | Path | The path where to save the shard data. | required | 
Example usage:
>>> import tempfile
>>> import numpy as np
>>> from pathlib import Path
>>> from iden.data.generator import DataGenerator
>>> from iden.shard.generator import NumpySafetensorsShardGenerator
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     generator = NumpySafetensorsShardGenerator(
...         data=DataGenerator({"key1": np.ones((2, 3)), "key2": np.arange(5)}),
...         path_uri=Path(tmpdir).joinpath("uri"),
...         path_shard=Path(tmpdir).joinpath("data"),
...     )
...     generator
...     shard = generator.generate("shard1")
...     shard
...
NumpySafetensorsShardGenerator(
  (path_uri): PosixPath('/.../uri')
  (path_shard): PosixPath('/.../data')
  (data): DataGenerator(copy=False)
)
NumpySafetensorsShard(uri=file:///.../uri/shard1)
iden.shard.generator.PickleShardGenerator ¶
              Bases: BaseFileShardGenerator[T]
Implement a pickle shard generator.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| data | BaseDataGenerator[T] | dict | The data to save in the shard. | required | 
| path_uri | Path | The path where to save the URI file. | required | 
| path_shard | Path | The path where to save the shard data. | required | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.data.generator import DataGenerator
>>> from iden.shard.generator import PickleShardGenerator
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     generator = PickleShardGenerator(
...         data=DataGenerator([1, 2, 3]),
...         path_uri=Path(tmpdir).joinpath("uri"),
...         path_shard=Path(tmpdir).joinpath("data"),
...     )
...     generator
...     shard = generator.generate("shard1")
...     shard
...
PickleShardGenerator(
  (path_uri): PosixPath('/.../uri')
  (path_shard): PosixPath('/.../data')
  (data): DataGenerator(copy=False)
)
PickleShard(uri=file:///.../uri/shard1)
iden.shard.generator.ShardDictGenerator ¶
              Bases: BaseShardGenerator[dict[str, BaseShard]]
Implement a ShardDict generator.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| shards | dict[str, BaseShardGenerator | dict] | The shard generators or their configurations. | required | 
| path_uri | Path | The path where to save the URI file. | required | 
Example usage:
>>> import tempfile
>>> import torch
>>> from pathlib import Path
>>> from iden.data.generator import DataGenerator
>>> from iden.shard.generator import ShardDictGenerator, JsonShardGenerator
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     generator = ShardDictGenerator(
...         shards={
...             "train": JsonShardGenerator(
...                 data=DataGenerator([1, 2, 3]),
...                 path_uri=Path(tmpdir).joinpath("uri"),
...                 path_shard=Path(tmpdir).joinpath("data"),
...             )
...         },
...         path_uri=Path(tmpdir).joinpath("uri"),
...     )
...     generator
...     shard = generator.generate("shard1")
...     shard
...
ShardDictGenerator(
  (path_uri): PosixPath('/.../uri')
  (shards):
    (train): JsonShardGenerator(
        (path_uri): PosixPath('/.../uri')
        (path_shard): PosixPath('/.../data')
        (data): DataGenerator(copy=False)
      )
)
ShardDict(
  (uri): file:///.../uri/shard1
  (shards):
    (train): JsonShard(uri=file:///.../uri/train)
)
iden.shard.generator.ShardTupleGenerator ¶
              Bases: BaseShardGenerator[tuple[BaseShard[T], ...]]
Implement a ShardTuple generator.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| shard | BaseShardGenerator[T] | dict | The shard generator or its configuration. | required | 
| num_shards | int | The number of shards to generate in the
 | required | 
| path_uri | Path | The path where to save the URI file. | required | 
Example usage:
>>> import tempfile
>>> import torch
>>> from pathlib import Path
>>> from iden.data.generator import DataGenerator
>>> from iden.shard.generator import ShardTupleGenerator, JsonShardGenerator
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     generator = ShardTupleGenerator(
...         shard=JsonShardGenerator(
...             data=DataGenerator([1, 2, 3]),
...             path_uri=Path(tmpdir).joinpath("uri"),
...             path_shard=Path(tmpdir).joinpath("data"),
...         ),
...         path_uri=Path(tmpdir).joinpath("uri"),
...         num_shards=5,
...     )
...     generator
...     shard = generator.generate("shard1")
...     shard
...
ShardTupleGenerator(
  (path_uri): PosixPath('/.../uri')
  (num_shards): 5
  (shard): JsonShardGenerator(
      (path_uri): PosixPath('/.../uri')
      (path_shard): PosixPath('/.../data')
      (data): DataGenerator(copy=False)
    )
)
ShardTuple(
  (uri): file:///.../uri/shard1
  (shards):
    (0): JsonShard(uri=file:///.../uri/000000001)
    (1): JsonShard(uri=file:///.../uri/000000002)
    (2): JsonShard(uri=file:///.../uri/000000003)
    (3): JsonShard(uri=file:///.../uri/000000004)
    (4): JsonShard(uri=file:///.../uri/000000005)
)
iden.shard.generator.TorchSafetensorsShardGenerator ¶
              Bases: BaseFileShardGenerator[dict[str, Tensor]]
Implement a safetensors shard generator.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| data | BaseDataGenerator[dict[str, Tensor]] | dict | The data to save in the shard. | required | 
| path_uri | Path | The path where to save the URI file. | required | 
| path_shard | Path | The path where to save the shard data. | required | 
Example usage:
>>> import tempfile
>>> import torch
>>> from pathlib import Path
>>> from iden.data.generator import DataGenerator
>>> from iden.shard.generator import TorchSafetensorsShardGenerator
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     generator = TorchSafetensorsShardGenerator(
...         data=DataGenerator({"key1": torch.ones(2, 3), "key2": torch.arange(5)}),
...         path_uri=Path(tmpdir).joinpath("uri"),
...         path_shard=Path(tmpdir).joinpath("data"),
...     )
...     generator
...     shard = generator.generate("shard1")
...     shard
...
TorchSafetensorsShardGenerator(
  (path_uri): PosixPath('/.../uri')
  (path_shard): PosixPath('/.../data')
  (data): DataGenerator(copy=False)
)
TorchSafetensorsShard(uri=file:///.../uri/shard1)
iden.shard.generator.TorchShardGenerator ¶
              Bases: BaseFileShardGenerator[T]
Implement a torch shard generator.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| data | BaseDataGenerator[T] | dict | The data to save in the shard. | required | 
| path_uri | Path | The path where to save the URI file. | required | 
| path_shard | Path | The path where to save the shard data. | required | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.data.generator import DataGenerator
>>> from iden.shard.generator import TorchShardGenerator
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     generator = TorchShardGenerator(
...         data=DataGenerator([1, 2, 3]),
...         path_uri=Path(tmpdir).joinpath("uri"),
...         path_shard=Path(tmpdir).joinpath("data"),
...     )
...     generator
...     shard = generator.generate("shard1")
...     shard
...
TorchShardGenerator(
  (path_uri): PosixPath('/.../uri')
  (path_shard): PosixPath('/.../data')
  (data): DataGenerator(copy=False)
)
TorchShard(uri=file:///.../uri/shard1)
iden.shard.generator.YamlShardGenerator ¶
              Bases: BaseFileShardGenerator[T]
Implement a YAML shard generator.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| data | BaseDataGenerator[T] | dict | The data to save in the shard. | required | 
| path_uri | Path | The path where to save the URI file. | required | 
| path_shard | Path | The path where to save the shard data. | required | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.data.generator import DataGenerator
>>> from iden.shard.generator import YamlShardGenerator
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     generator = YamlShardGenerator(
...         data=DataGenerator([1, 2, 3]),
...         path_uri=Path(tmpdir).joinpath("uri"),
...         path_shard=Path(tmpdir).joinpath("data"),
...     )
...     generator
...     shard = generator.generate("shard1")
...     shard
...
YamlShardGenerator(
  (path_uri): PosixPath('/.../uri')
  (path_shard): PosixPath('/.../data')
  (data): DataGenerator(copy=False)
)
YamlShard(uri=file:///.../uri/shard1)
iden.shard.generator.is_shard_generator_config ¶
is_shard_generator_config(config: dict) -> bool
Indicate if the input configuration is a configuration for a
BaseShardGenerator.
This function only checks if the value of the key  _target_
is valid. It does not check the other values. If _target_
indicates a function, the returned type hint is used to check
the class.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| config | dict | The configuration to check. | required | 
Returns:
| Type | Description | 
|---|---|
| bool | 
 | 
Example usage:
>>> from iden.shard.generator import is_shard_generator_config
>>> is_shard_generator_config({"_target_": "iden.shard.generator.JsonShardGenerator"})
True
iden.shard.generator.setup_shard_generator ¶
setup_shard_generator(
    shard_generator: BaseShardGenerator[T] | dict,
) -> BaseShardGenerator[T]
Set up a shard generator.
The shard generator is instantiated from its configuration by using the
BaseShardGenerator factory function.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| shard_generator | BaseShardGenerator[T] | dict | The shard generator or its configuration. | required | 
Returns:
| Type | Description | 
|---|---|
| BaseShardGenerator[T] | The instantiated shard generator. | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard.generator import setup_shard_generator
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     generator = setup_shard_generator(
...         {
...             "_target_": "iden.shard.generator.JsonShardGenerator",
...             "data": [1, 2, 3],
...             "path_uri": Path(tmpdir).joinpath("uri"),
...             "path_shard": Path(tmpdir).joinpath("data"),
...         }
...     )
...     generator
...
JsonShardGenerator(
  (path_uri): PosixPath('/.../uri')
  (path_shard): PosixPath('/.../data')
  (data): [1, 2, 3]
)
iden.shard.loader ¶
Contain shard loader implementations.
iden.shard.loader.BaseShardLoader ¶
              Bases: Generic[T], ABC
Define the base class to implement a shard loader.
A shard loader object allows to load a BaseShard object from
its Uniform Resource Identifier (URI).
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_json_shard
>>> from iden.shard.loader import JsonShardLoader
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     uri = Path(tmpdir).joinpath("my_uri").as_uri()
...     _ = create_json_shard([1, 2, 3], uri=uri)
...     loader = JsonShardLoader()
...     loader
...
JsonShardLoader()
            iden.shard.loader.BaseShardLoader.load
  
      abstractmethod
  
¶
load(uri: str) -> BaseShard[T]
Load a shard from its Uniform Resource Identifier (URI).
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| uri | str | The URI of the shard to load. | required | 
Returns:
| Type | Description | 
|---|---|
| BaseShard[T] | The loaded shard. | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_json_shard
>>> from iden.shard.loader import JsonShardLoader
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     uri = Path(tmpdir).joinpath("my_uri").as_uri()
...     _ = create_json_shard([1, 2, 3], uri=uri)
...     loader = JsonShardLoader()
...     shard = loader.load(uri)
...     shard
...
JsonShard(uri=file:///.../my_uri)
iden.shard.loader.CloudpickleShardLoader ¶
              Bases: BaseShardLoader[Any]
Implement a cloudpickle shard loader.
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_cloudpickle_shard
>>> from iden.shard.loader import CloudpickleShardLoader
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     uri = Path(tmpdir).joinpath("my_uri").as_uri()
...     _ = create_cloudpickle_shard([1, 2, 3], uri=uri)
...     loader = CloudpickleShardLoader()
...     shard = loader.load(uri)
...     shard
...
CloudpickleShard(uri=file:///.../my_uri)
iden.shard.loader.FileShardLoader ¶
              Bases: BaseShardLoader[Any]
Implement a file-based shard loader.
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_json_shard
>>> from iden.shard.loader import FileShardLoader
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     uri = Path(tmpdir).joinpath("my_uri").as_uri()
...     _ = create_json_shard([1, 2, 3], uri=uri)
...     loader = FileShardLoader()
...     shard = loader.load(uri)
...     shard
...
FileShard(uri=file:///.../my_uri)
iden.shard.loader.JoblibShardLoader ¶
              Bases: BaseShardLoader[Any]
Implement a joblib shard loader.
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_joblib_shard
>>> from iden.shard.loader import JoblibShardLoader
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     uri = Path(tmpdir).joinpath("my_uri").as_uri()
...     _ = create_joblib_shard([1, 2, 3], uri=uri)
...     loader = JoblibShardLoader()
...     shard = loader.load(uri)
...     shard
...
JoblibShard(uri=file:///.../my_uri)
iden.shard.loader.JsonShardLoader ¶
              Bases: BaseShardLoader[Any]
Implement a JSON shard loader.
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_json_shard
>>> from iden.shard.loader import JsonShardLoader
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     uri = Path(tmpdir).joinpath("my_uri").as_uri()
...     _ = create_json_shard([1, 2, 3], uri=uri)
...     loader = JsonShardLoader()
...     shard = loader.load(uri)
...     shard
...
JsonShard(uri=file:///.../my_uri)
iden.shard.loader.NumpySafetensorsShardLoader ¶
              Bases: BaseShardLoader[dict[str, ndarray]]
Implement a safetensors shard loader for numpy.ndarrays.
Raises:
| Type | Description | 
|---|---|
| RuntimeError | if  | 
Example usage:
>>> import tempfile
>>> import numpy as np
>>> from pathlib import Path
>>> from iden.shard import create_numpy_safetensors_shard
>>> from iden.shard.loader import NumpySafetensorsShardLoader
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     uri = Path(tmpdir).joinpath("my_uri").as_uri()
...     _ = create_numpy_safetensors_shard(
...         {"key1": np.ones((2, 3)), "key2": np.arange(5)}, uri=uri
...     )
...     loader = NumpySafetensorsShardLoader()
...     shard = loader.load(uri)
...     shard
...
NumpySafetensorsShard(uri=file:///.../my_uri)
iden.shard.loader.PickleShardLoader ¶
              Bases: BaseShardLoader[Any]
Implement a pickle shard loader.
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_pickle_shard
>>> from iden.shard.loader import PickleShardLoader
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     uri = Path(tmpdir).joinpath("my_uri").as_uri()
...     _ = create_pickle_shard([1, 2, 3], uri=uri)
...     loader = PickleShardLoader()
...     shard = loader.load(uri)
...     shard
...
PickleShard(uri=file:///.../my_uri)
iden.shard.loader.ShardDictLoader ¶
              Bases: BaseShardLoader[dict[str, BaseShard]]
Implement a ShardDict loader.
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_json_shard, create_shard_dict
>>> from iden.shard.loader import ShardDictLoader
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     uri = Path(tmpdir).joinpath("uri").as_uri()
...     shards = {
...         "train": create_json_shard(
...             [1, 2, 3], uri=Path(tmpdir).joinpath("shard/uri1").as_uri()
...         ),
...         "val": create_json_shard(
...             [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shard/uri2").as_uri()
...         ),
...     }
...     _ = create_shard_dict(shards, uri=uri)
...     loader = ShardDictLoader()
...     shard = loader.load(uri)
...     shard
...
ShardDict(
  (uri): file:///.../uri
  (shards):
    (train): JsonShard(uri=file:///.../shard/uri1)
    (val): JsonShard(uri=file:///.../shard/uri2)
)
iden.shard.loader.ShardTupleLoader ¶
              Bases: BaseShardLoader[tuple[BaseShard, ...]]
Implement a ShardTuple loader.
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_json_shard, create_shard_tuple
>>> from iden.shard.loader import ShardTupleLoader
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     uri = Path(tmpdir).joinpath("uri").as_uri()
...     shards = [
...         create_json_shard([1, 2, 3], uri=Path(tmpdir).joinpath("shard/uri1").as_uri()),
...         create_json_shard(
...             [4, 5, 6, 7], uri=Path(tmpdir).joinpath("shard/uri2").as_uri()
...         ),
...     ]
...     _ = create_shard_tuple(shards, uri=uri)
...     loader = ShardTupleLoader()
...     shard = loader.load(uri)
...     shard
...
ShardTuple(
  (uri): file:///.../uri
  (shards):
    (0): JsonShard(uri=file:///.../shard/uri1)
    (1): JsonShard(uri=file:///.../shard/uri2)
)
iden.shard.loader.TorchSafetensorsShardLoader ¶
              Bases: BaseShardLoader[dict[str, Tensor]]
Implement a safetensors shard loader for torch.Tensors.
Raises:
| Type | Description | 
|---|---|
| RuntimeError | if  | 
Example usage:
>>> import tempfile
>>> import torch
>>> from pathlib import Path
>>> from iden.shard import create_torch_safetensors_shard
>>> from iden.shard.loader import TorchSafetensorsShardLoader
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     uri = Path(tmpdir).joinpath("my_uri").as_uri()
...     _ = create_torch_safetensors_shard(
...         {"key1": torch.ones(2, 3), "key2": torch.arange(5)}, uri=uri
...     )
...     loader = TorchSafetensorsShardLoader()
...     shard = loader.load(uri)
...     shard
...
TorchSafetensorsShard(uri=file:///.../my_uri)
iden.shard.loader.TorchShardLoader ¶
              Bases: BaseShardLoader[Any]
Implement a PyTorch shard loader.
Raises:
| Type | Description | 
|---|---|
| RuntimeError | if  | 
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_torch_shard
>>> from iden.shard.loader import TorchShardLoader
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     uri = Path(tmpdir).joinpath("my_uri").as_uri()
...     _ = create_torch_shard([1, 2, 3], uri=uri)
...     loader = TorchShardLoader()
...     shard = loader.load(uri)
...     shard
...
TorchShard(uri=file:///.../my_uri)
iden.shard.loader.YamlShardLoader ¶
              Bases: BaseShardLoader[Any]
Implement a YAML shard loader.
Example usage:
>>> import tempfile
>>> from pathlib import Path
>>> from iden.shard import create_yaml_shard
>>> from iden.shard.loader import YamlShardLoader
>>> with tempfile.TemporaryDirectory() as tmpdir:
...     uri = Path(tmpdir).joinpath("my_uri").as_uri()
...     _ = create_yaml_shard([1, 2, 3], uri=uri)
...     loader = YamlShardLoader()
...     shard = loader.load(uri)
...     shard
...
YamlShard(uri=file:///.../my_uri)
iden.shard.loader.is_shard_loader_config ¶
is_shard_loader_config(config: dict) -> bool
Indicate if the input configuration is a configuration for a
BaseShardLoader.
This function only checks if the value of the key  _target_
is valid. It does not check the other values. If _target_
indicates a function, the returned type hint is used to check
the class.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| config | dict | The configuration to check. | required | 
Returns:
| Type | Description | 
|---|---|
| bool | 
 | 
Example usage:
>>> from iden.shard.loader import is_shard_loader_config
>>> is_shard_loader_config({"_target_": "iden.shard.loader.JsonShardLoader"})
True
iden.shard.loader.setup_shard_loader ¶
setup_shard_loader(
    shard_loader: BaseShardLoader | dict,
) -> BaseShardLoader
Set up a shard loader.
The shard loader is instantiated from its configuration by using the
BaseShardLoader factory function.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| shard_loader | BaseShardLoader | dict | The shard loader or its configuration. | required | 
Returns:
| Type | Description | 
|---|---|
| BaseShardLoader | The instantiated shard loader. | 
Example usage:
>>> from iden.shard.loader import setup_shard_loader
>>> shard_loader = setup_shard_loader({"_target_": "iden.shard.loader.JsonShardLoader"})
>>> shard_loader
JsonShardLoader()