Bases: BaseSyntheticDataset
Generates points in a 2D checkerboard pattern using rejection sampling.
Parameters:
Name |
Type |
Description |
Default |
n_samples
|
int
|
Target number of samples. Default: 2000.
|
2000
|
range_limit
|
float
|
Defines the square region [-lim, lim] x [-lim, lim]. Default: 4.0.
|
4.0
|
noise
|
float
|
Small Gaussian noise added to points. Default: 0.01.
|
0.01
|
device
|
Optional[Union[str, device]]
|
|
None
|
dtype
|
dtype
|
Data type for the tensor. Default: torch.float32.
|
float32
|
seed
|
Optional[int]
|
Random seed for reproducibility.
|
None
|
Source code in torchebm/datasets/generators.py
| class CheckerboardDataset(BaseSyntheticDataset):
"""
Generates points in a 2D checkerboard pattern using rejection sampling.
Args:
n_samples (int): Target number of samples. Default: 2000.
range_limit (float): Defines the square region [-lim, lim] x [-lim, lim]. Default: 4.0.
noise (float): Small Gaussian noise added to points. Default: 0.01.
device (Optional[Union[str, torch.device]]): Device for the tensor.
dtype (torch.dtype): Data type for the tensor. Default: torch.float32.
seed (Optional[int]): Random seed for reproducibility.
"""
def __init__(
self,
n_samples: int = 2000,
range_limit: float = 4.0,
noise: float = 0.01,
device: Optional[Union[str, torch.device]] = None,
dtype: torch.dtype = torch.float32,
seed: Optional[int] = None,
):
self.range_limit = range_limit
self.noise = noise
super().__init__(n_samples=n_samples, device=device, dtype=dtype, seed=seed)
def _generate_data(self) -> torch.Tensor:
# Logic from make_checkerboard
collected_samples = []
target = self.n_samples
# Estimate batch size needed (density is ~0.5)
batch_size = max(1000, int(target * 2.5)) # Generate more than needed per batch
while len(collected_samples) < target:
x = np.random.uniform(-self.range_limit, self.range_limit, size=batch_size)
y = np.random.uniform(-self.range_limit, self.range_limit, size=batch_size)
keep = (np.floor(x) + np.floor(y)) % 2 != 0
valid_points = np.vstack((x[keep], y[keep])).T.astype(np.float32)
needed = target - len(collected_samples)
collected_samples.extend(valid_points[:needed]) # Add only needed points
X = np.array(
collected_samples[:target], dtype=np.float32
) # Ensure exact n_samples
tensor_data = torch.from_numpy(X)
tensor_data += torch.randn_like(tensor_data) * self.noise
return tensor_data
|
range_limit
instance-attribute
range_limit = range_limit
_generate_data
_generate_data() -> torch.Tensor
Source code in torchebm/datasets/generators.py
| def _generate_data(self) -> torch.Tensor:
# Logic from make_checkerboard
collected_samples = []
target = self.n_samples
# Estimate batch size needed (density is ~0.5)
batch_size = max(1000, int(target * 2.5)) # Generate more than needed per batch
while len(collected_samples) < target:
x = np.random.uniform(-self.range_limit, self.range_limit, size=batch_size)
y = np.random.uniform(-self.range_limit, self.range_limit, size=batch_size)
keep = (np.floor(x) + np.floor(y)) % 2 != 0
valid_points = np.vstack((x[keep], y[keep])).T.astype(np.float32)
needed = target - len(collected_samples)
collected_samples.extend(valid_points[:needed]) # Add only needed points
X = np.array(
collected_samples[:target], dtype=np.float32
) # Ensure exact n_samples
tensor_data = torch.from_numpy(X)
tensor_data += torch.randn_like(tensor_data) * self.noise
return tensor_data
|