Skip to content

TwoMoonsDataset

Methods and Attributes

Bases: BaseSyntheticDataset

Generates the 'two moons' dataset.

Creates two interleaving half-circles with added Gaussian noise.

Parameters:

Name Type Description Default
n_samples int

Total number of samples. Default: 2000.

2000
noise float

Standard deviation of Gaussian noise added. Default: 0.05.

0.05
device Optional[Union[str, device]]

Device for the tensor.

None
dtype dtype

Data type for the tensor. Default: torch.float32.

float32
seed Optional[int]

Random seed for reproducibility.

None
Source code in torchebm/datasets/generators.py
class TwoMoonsDataset(BaseSyntheticDataset):
    """
    Generates the 'two moons' dataset.

    Creates two interleaving half-circles with added Gaussian noise.

    Args:
        n_samples (int): Total number of samples. Default: 2000.
        noise (float): Standard deviation of Gaussian noise added. Default: 0.05.
        device (Optional[Union[str, torch.device]]): Device for the tensor.
        dtype (torch.dtype): Data type for the tensor. Default: torch.float32.
        seed (Optional[int]): Random seed for reproducibility.
    """

    def __init__(
        self,
        n_samples: int = 2000,
        noise: float = 0.05,
        device: Optional[Union[str, torch.device]] = None,
        dtype: torch.dtype = torch.float32,
        seed: Optional[int] = None,
    ):
        self.noise = noise
        super().__init__(n_samples=n_samples, device=device, dtype=dtype, seed=seed)

    def _generate_data(self) -> np.ndarray:
        # Logic from make_two_moons (using numpy initially is fine here)
        n_samples_out = self.n_samples // 2
        n_samples_in = self.n_samples - n_samples_out

        outer_circ_x = np.cos(np.linspace(0, np.pi, n_samples_out))
        outer_circ_y = np.sin(np.linspace(0, np.pi, n_samples_out))
        inner_circ_x = 1 - np.cos(np.linspace(0, np.pi, n_samples_in))
        inner_circ_y = 1 - np.sin(np.linspace(0, np.pi, n_samples_in)) - 0.5

        X = np.vstack(
            [
                np.append(outer_circ_x, inner_circ_x),
                np.append(outer_circ_y, inner_circ_y),
            ]
        ).T.astype(np.float32)

        # Add noise using torch AFTER converting base batch_shape to tensor
        tensor_data = torch.from_numpy(X)  # Keep on CPU initially for noise addition
        noise_val = torch.randn_like(tensor_data) * self.noise
        tensor_data += noise_val

        # Base class __init__ will handle final _to_tensor conversion for device/dtype
        # Alternatively, add noise directly on the target device:
        # tensor_data = torch.from_numpy(X).to(device=self.device, dtype=self.dtype)
        # tensor_data += torch.randn_like(tensor_data) * self.noise
        # return tensor_data # Return tensor directly if handled here

        return tensor_data  # Return tensor, base class handles device/dtype

noise instance-attribute

noise = noise

_generate_data

_generate_data() -> np.ndarray
Source code in torchebm/datasets/generators.py
def _generate_data(self) -> np.ndarray:
    # Logic from make_two_moons (using numpy initially is fine here)
    n_samples_out = self.n_samples // 2
    n_samples_in = self.n_samples - n_samples_out

    outer_circ_x = np.cos(np.linspace(0, np.pi, n_samples_out))
    outer_circ_y = np.sin(np.linspace(0, np.pi, n_samples_out))
    inner_circ_x = 1 - np.cos(np.linspace(0, np.pi, n_samples_in))
    inner_circ_y = 1 - np.sin(np.linspace(0, np.pi, n_samples_in)) - 0.5

    X = np.vstack(
        [
            np.append(outer_circ_x, inner_circ_x),
            np.append(outer_circ_y, inner_circ_y),
        ]
    ).T.astype(np.float32)

    # Add noise using torch AFTER converting base batch_shape to tensor
    tensor_data = torch.from_numpy(X)  # Keep on CPU initially for noise addition
    noise_val = torch.randn_like(tensor_data) * self.noise
    tensor_data += noise_val

    # Base class __init__ will handle final _to_tensor conversion for device/dtype
    # Alternatively, add noise directly on the target device:
    # tensor_data = torch.from_numpy(X).to(device=self.device, dtype=self.dtype)
    # tensor_data += torch.randn_like(tensor_data) * self.noise
    # return tensor_data # Return tensor directly if handled here

    return tensor_data  # Return tensor, base class handles device/dtype