class TwoMoonsDataset(BaseSyntheticDataset):
"""
Generates the 'two moons' dataset.
Args:
n_samples (int): The total number of samples.
noise (float): The standard deviation of the Gaussian noise to add.
device (Optional[Union[str, torch.device]]): The device for the tensor.
dtype (torch.dtype): The data type for the tensor.
seed (Optional[int]): A random seed for reproducibility.
"""
def __init__(
self,
n_samples: int = 2000,
noise: float = 0.05,
device: Optional[Union[str, torch.device]] = None,
dtype: torch.dtype = torch.float32,
seed: Optional[int] = None,
):
self.noise = noise
super().__init__(n_samples=n_samples, device=device, dtype=dtype, seed=seed)
def _generate_data(self) -> np.ndarray:
# Logic from make_two_moons (using numpy initially is fine here)
n_samples_out = self.n_samples // 2
n_samples_in = self.n_samples - n_samples_out
outer_circ_x = np.cos(np.linspace(0, np.pi, n_samples_out))
outer_circ_y = np.sin(np.linspace(0, np.pi, n_samples_out))
inner_circ_x = 1 - np.cos(np.linspace(0, np.pi, n_samples_in))
inner_circ_y = 1 - np.sin(np.linspace(0, np.pi, n_samples_in)) - 0.5
X = np.vstack(
[
np.append(outer_circ_x, inner_circ_x),
np.append(outer_circ_y, inner_circ_y),
]
).T.astype(np.float32)
# Add noise using torch AFTER converting base batch_shape to tensor
tensor_data = torch.from_numpy(X) # Keep on CPU initially for noise addition
noise_val = torch.randn_like(tensor_data) * self.noise
tensor_data += noise_val
# Base class __init__ will handle final _to_tensor conversion for device/dtype
# Alternatively, add noise directly on the target device:
# tensor_data = torch.from_numpy(X).to(device=self.device, dtype=self.dtype)
# tensor_data += torch.randn_like(tensor_data) * self.noise
# return tensor_data # Return tensor directly if handled here
return tensor_data # Return tensor, base class handles device/dtype