Code Examples¶
This page provides practical examples of using coola in real-world scenarios.
Basic Examples¶
Comparing Configuration Dictionaries¶
import yaml
from coola import objects_are_equal
# Load configuration files
with open("config1.yaml") as f:
config1 = yaml.safe_load(f)
with open("config2.yaml") as f:
config2 = yaml.safe_load(f)
# Compare configurations
if objects_are_equal(config1, config2, show_difference=True):
print("Configurations are identical")
else:
print("Configurations differ")
Validating Model Outputs¶
import torch
from coola import objects_are_allclose
def test_model_inference():
model = load_model()
test_input = torch.randn(1, 3, 224, 224)
# Get actual output
actual_output = model(test_input)
# Load expected output
expected_output = torch.load("expected_output.pt")
# Compare with tolerance
assert objects_are_allclose(
actual_output, expected_output, atol=1e-5, rtol=1e-4, show_difference=True
), "Model output differs from expected"
Machine Learning Examples¶
Comparing Training Checkpoints¶
import torch
from coola import objects_are_equal
def compare_checkpoints(checkpoint1_path, checkpoint2_path):
"""Compare two PyTorch checkpoint files."""
checkpoint1 = torch.load(checkpoint1_path)
checkpoint2 = torch.load(checkpoint2_path)
# Compare model state dicts
if not objects_are_equal(
checkpoint1["model_state_dict"], checkpoint2["model_state_dict"]
):
print("Model state dicts differ")
return False
# Compare optimizer state dicts
if not objects_are_equal(
checkpoint1["optimizer_state_dict"], checkpoint2["optimizer_state_dict"]
):
print("Optimizer state dicts differ")
return False
# Compare other metadata
metadata_keys = ["epoch", "loss", "accuracy"]
for key in metadata_keys:
if key in checkpoint1 and key in checkpoint2:
if checkpoint1[key] != checkpoint2[key]:
print(f"Metadata '{key}' differs")
return False
return True
Validating Data Preprocessing¶
import numpy as np
from coola import objects_are_allclose
def test_preprocessing_pipeline():
"""Test that preprocessing is deterministic."""
# Sample data
raw_data = load_raw_data()
# Process twice
processed_1 = preprocessing_pipeline(raw_data, seed=42)
processed_2 = preprocessing_pipeline(raw_data, seed=42)
# Should be identical when using same seed
assert objects_are_allclose(
processed_1, processed_2, equal_nan=True
), "Preprocessing is not deterministic"
Comparing Model Predictions¶
import torch
from coola import objects_are_allclose
def compare_model_versions(model_v1, model_v2, test_data):
"""Compare predictions from two model versions."""
model_v1.eval()
model_v2.eval()
differences = []
with torch.no_grad():
for batch_idx, (inputs, targets) in enumerate(test_data):
pred_v1 = model_v1(inputs)
pred_v2 = model_v2(inputs)
# Check if predictions are close
if not objects_are_allclose(pred_v1, pred_v2, atol=1e-4):
differences.append(
{
"batch_idx": batch_idx,
"max_diff": (pred_v1 - pred_v2).abs().max().item(),
}
)
return differences
Data Science Examples¶
Comparing DataFrames¶
import pandas as pd
from coola import objects_are_equal
def compare_dataframes(df1, df2, ignore_index=False):
"""Compare two pandas DataFrames."""
if ignore_index:
df1 = df1.reset_index(drop=True)
df2 = df2.reset_index(drop=True)
return objects_are_equal(df1, df2, show_difference=True)
# Example usage
df1 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
df2 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
if compare_dataframes(df1, df2):
print("DataFrames are equal")
Validating Data Transformations¶
import numpy as np
import pandas as pd
from coola import objects_are_allclose
def test_data_transformation():
"""Test that data transformation preserves statistical properties."""
original = pd.DataFrame({"values": np.random.randn(1000)})
# Transform data
transformed = transform_data(original)
# Transform back
restored = inverse_transform_data(transformed)
# Check if original and restored are close
assert objects_are_allclose(
original.values, restored.values, atol=1e-6, rtol=1e-5
), "Transformation is not reversible"
Comparing Time Series Data¶
import pandas as pd
import numpy as np
from coola import objects_are_allclose
def compare_time_series(series1, series2, tolerance=1e-6):
"""Compare two time series with tolerance for floating-point errors."""
# Ensure same index
if not series1.index.equals(series2.index):
print("Time series have different indices")
return False
# Compare values with tolerance
return objects_are_allclose(
series1.values,
series2.values,
atol=tolerance,
equal_nan=True, # Treat NaN as equal
)
Testing Examples¶
pytest Integration¶
import pytest
import torch
from coola import objects_are_equal, objects_are_allclose
@pytest.fixture
def sample_tensor():
return torch.randn(10, 10)
def test_tensor_transformation(sample_tensor):
"""Test that transformation produces expected output."""
result = my_transformation(sample_tensor)
expected = load_expected_result("transformation_output.pt")
assert objects_are_allclose(result, expected, atol=1e-6, show_difference=True)
def test_data_equality(sample_tensor):
"""Test data equality with coola."""
processed = process_data(sample_tensor)
# Load expected structure
expected = {
"data": torch.zeros(10, 10),
"metadata": {"shape": (10, 10), "dtype": "float32"},
}
assert objects_are_equal(processed, expected, show_difference=True)
Unittest Integration¶
import unittest
import numpy as np
from coola import objects_are_equal, objects_are_allclose
class TestDataProcessing(unittest.TestCase):
def setUp(self):
self.test_data = np.random.randn(100, 100)
def test_data_normalization(self):
"""Test data normalization."""
normalized = normalize_data(self.test_data)
# Check that mean is close to 0 and std is close to 1
self.assertTrue(objects_are_allclose(normalized.mean(), 0.0, atol=1e-6))
self.assertTrue(objects_are_allclose(normalized.std(), 1.0, atol=1e-6))
def test_data_structure(self):
"""Test that data structure matches expected."""
result = create_data_structure(self.test_data)
expected = {
"data": self.test_data,
"mean": self.test_data.mean(),
"std": self.test_data.std(),
}
self.assertTrue(objects_are_equal(result, expected, show_difference=True))
Advanced Examples¶
Custom Comparator for Custom Classes¶
from typing import Any
from coola import objects_are_equal
from coola.equality import EqualityConfig
from coola.equality.comparators import BaseEqualityComparator
from coola.equality.testers import EqualityTester
class Vector3D:
"""A simple 3D vector class."""
def __init__(self, x, y, z):
self.x = x
self.y = y
self.z = z
class Vector3DComparator(BaseEqualityComparator):
"""Custom comparator for Vector3D objects."""
def clone(self):
return self.__class__()
def equal(self, actual: Vector3D, expected: Any, config: EqualityConfig) -> bool:
if not isinstance(expected, Vector3D):
if config.show_difference:
print(f"Types differ: {type(actual)} vs {type(expected)}")
return False
# Compare components
equal = (
actual.x == expected.x and actual.y == expected.y and actual.z == expected.z
)
if not equal and config.show_difference:
print(
f"Vectors differ: ({actual.x}, {actual.y}, {actual.z}) "
f"vs ({expected.x}, {expected.y}, {expected.z})"
)
return equal
# Register the comparator
tester = EqualityTester.local_copy()
tester.add_comparator(Vector3D, Vector3DComparator())
# Use it
v1 = Vector3D(1, 2, 3)
v2 = Vector3D(1, 2, 3)
v3 = Vector3D(1, 2, 4)
print(objects_are_equal(v1, v2, tester=tester)) # True
print(objects_are_equal(v1, v3, tester=tester, show_difference=True)) # False
Comparing Nested Complex Structures¶
import torch
import numpy as np
import pandas as pd
from coola import objects_are_equal
def compare_ml_experiment_results(result1, result2):
"""Compare complete ML experiment results."""
experiment1 = {
"config": {
"learning_rate": 0.001,
"batch_size": 32,
"epochs": 100,
"optimizer": "adam",
},
"metrics": {
"train": pd.DataFrame(
{"loss": [0.5, 0.4, 0.3], "accuracy": [0.8, 0.85, 0.9]}
),
"val": pd.DataFrame(
{"loss": [0.6, 0.5, 0.4], "accuracy": [0.75, 0.8, 0.85]}
),
},
"model_weights": {
"layer1": torch.randn(100, 50),
"layer2": torch.randn(50, 10),
},
"predictions": {
"test": np.random.rand(100, 10),
"metadata": {"num_samples": 100, "num_classes": 10},
},
}
experiment2 = {
# Similar structure
"config": experiment1["config"].copy(),
"metrics": {
"train": experiment1["metrics"]["train"].copy(),
"val": experiment1["metrics"]["val"].copy(),
},
"model_weights": {
"layer1": experiment1["model_weights"]["layer1"].clone(),
"layer2": experiment1["model_weights"]["layer2"].clone(),
},
"predictions": {
"test": experiment1["predictions"]["test"].copy(),
"metadata": experiment1["predictions"]["metadata"].copy(),
},
}
return objects_are_equal(experiment1, experiment2, show_difference=True)
Conditional Comparison Based on Type¶
import torch
import numpy as np
from coola import objects_are_equal, objects_are_allclose
def smart_compare(obj1, obj2, numeric_tolerance=1e-6):
"""Smart comparison that uses appropriate method based on type."""
# For numeric types, use allclose
if isinstance(obj1, (torch.Tensor, np.ndarray)):
return objects_are_allclose(
obj1, obj2, atol=numeric_tolerance, rtol=numeric_tolerance
)
# For dictionaries, check each value
if isinstance(obj1, dict) and isinstance(obj2, dict):
if obj1.keys() != obj2.keys():
return False
return all(
smart_compare(obj1[k], obj2[k], numeric_tolerance) for k in obj1.keys()
)
# For lists/tuples, check each element
if isinstance(obj1, (list, tuple)) and isinstance(obj2, (list, tuple)):
if len(obj1) != len(obj2):
return False
return all(smart_compare(a, b, numeric_tolerance) for a, b in zip(obj1, obj2))
# For other types, use exact equality
return objects_are_equal(obj1, obj2)
Integration Examples¶
With Logging¶
import logging
from coola import objects_are_equal
# Configure logging
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
def compare_with_logging(obj1, obj2, context=""):
"""Compare objects with detailed logging."""
logger = logging.getLogger(__name__)
logger.info(f"Starting comparison{f' for {context}' if context else ''}")
result = objects_are_equal(obj1, obj2, show_difference=True)
if result:
logger.info(f"Objects are equal{f' ({context})' if context else ''}")
else:
logger.warning(f"Objects differ{f' ({context})' if context else ''}")
return result
With Context Managers¶
import time
from contextlib import contextmanager
from coola import objects_are_equal
@contextmanager
def timed_comparison(description=""):
"""Time the comparison operation."""
start = time.time()
print(f"Starting comparison{f': {description}' if description else ''}...")
try:
yield
finally:
elapsed = time.time() - start
print(f"Comparison completed in {elapsed:.4f} seconds")
# Usage
with timed_comparison("model checkpoints"):
result = objects_are_equal(checkpoint1, checkpoint2, show_difference=True)
Best Practices¶
2. Use Appropriate Tolerance for Numerical Data¶
from coola import objects_are_allclose
# For single-precision floats
objects_are_allclose(float32_data1, float32_data2, atol=1e-6, rtol=1e-5)
# For double-precision floats
objects_are_allclose(float64_data1, float64_data2, atol=1e-12, rtol=1e-10)
3. Handle NaN Values Explicitly¶
from coola import objects_are_allclose
# Explicitly decide how to handle NaN
result = objects_are_allclose(
data1,
data2,
equal_nan=True, # or False, depending on requirements
show_difference=True,
)
4. Compare Metadata Before Large Data¶
from coola import objects_are_equal
def efficient_compare(obj1, obj2):
# Quick checks first
if obj1.metadata != obj2.metadata:
return False
# Expensive comparison last
return objects_are_equal(obj1.data, obj2.data)
See Also¶
- Quickstart Guide - Basic usage examples
- FAQ - Frequently asked questions