Source code for permanova.experiments.exp_utils

from ..permanova_np.lightpermanova import LightPERMANOVA
from ..permanova_torch.lightpermanova import LightPERMANOVA as LightPERMANOVATorch
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import torch



[docs]
class NumpyTest:
    """
    class to perform tabular tests with the NumPy implementation of LightPERMANOVA
    """


[docs]
    @staticmethod
    def plot_distributions(sample_1: np.ndarray, sample_2: np.ndarray):
        """
        Plots the comparison of the distributions for each variable in the samples.

        Args:
            sample_1 (np.ndarray): original sample
            sample_2 (np.ndarray): sample with noise
        """
        for i in range(sample_1.shape[1]):
            sns.histplot(sample_1[:, i], label="original")
            sns.histplot(sample_2[:, i], label="new")
            plt.title(f"Difference in var {i}")
            plt.legend()
            plt.show()



[docs]
    @staticmethod
    def generate_samples_with_normal_covariates(
        dim: int, noise_mean: float, noise_std: float, size: int
    ) -> tuple:
        """
        Generates samples with normally distributed covariates

        Args:
            dim (int): number of covariates that describe the samples
            noise_mean (float): mean of the noise applied to each variable of the original sample
            noise_std (float): std of the noise applied to each variable of the original sample
            size (int): size of the samples

        Returns:
            tuple: (sample_1: np.ndarray, sample_2: np.ndarray)
        """
        np.random.seed(6)
        mean1 = np.arange(dim)
        A = np.random.randn(dim, dim)
        covariance_matrix1 = np.dot(A, A.T)

        sample_1 = np.random.multivariate_normal(mean1, covariance_matrix1, size)

        noise = np.random.normal(loc=noise_mean, scale=noise_std, size=sample_1.shape)
        sample_2 = sample_1 + noise

        return sample_1, sample_2



[docs]
    @staticmethod
    def generate_samples_with_mixture_covariates(
        dim: int, noise_mean_factor: float, noise_std_factor: float, size: int
    ) -> tuple:
        """
        Generates samples with covariates drawn from mixture of two gaussians

        Args:
            dim (int): number of covariates that describe the samples
            noise_mean_factor (float): to each of the gaussians of sample_2 will be applied a noise with mean = noise_mean_factor * original mean of the gaussian
            noise_std_factor (float): to each of the gaussians of sample_2 will be applied a noise with std = noise_std_factor * original std of the gaussian
            size (int): size of the samples

        Returns:
            tuple: (sample_1: np.ndarray, sample_2: np.ndarray)
        """
        np.random.seed(6)
        sample_1_list = []
        sample_2_list = []
        for _ in range(dim):
            mean1 = np.random.randint(5)
            std1 = np.abs(np.random.randn())
            mean2 = np.random.randint(5)
            std2 = np.abs(np.random.randn())
            # Generate random samples from a uniform distribution to decide which Gaussian to sample from
            mix = np.random.rand(size)

            # Sample from the first Gaussian where mix is less than weight1
            s1 = np.random.normal(loc=mean1, scale=std1, size=size)
            s2 = np.random.normal(loc=mean2, scale=std2, size=size)
            noise_1 = np.random.normal(
                loc=(mean1 * noise_mean_factor),
                scale=(std1 * noise_std_factor),
                size=size,
            )
            noise_2 = np.random.normal(
                loc=(mean2 * noise_mean_factor),
                scale=(std2 * noise_std_factor),
                size=size,
            )
            var_1 = np.where(
                mix < 0.5,
                s1,
                s2,
            )

            var_2 = np.where(
                mix < 0.5,
                s1 + noise_1,
                s2 + noise_2,
            )

            sample_1_list.append(var_1)
            sample_2_list.append(var_2)
        sample_1 = np.stack(sample_1_list, axis=1)
        sample_2 = np.stack(sample_2_list, axis=1)

        return sample_1, sample_2



[docs]
    @staticmethod
    def test(sample_1, sample_2, show_plots: bool = False):
        if show_plots:
            NumpyTest.plot_distributions(sample_1, sample_2)

        print("Computing permutations...")
        permanova = LightPERMANOVA(sample_1, compress=True)
        pvalue = permanova.run_simulation(sample_2, tot_permutations=10000)
        print(f"p-value: {round(pvalue,5)}")





[docs]
class TorchTest:
    """
    class to perform tabular tests with the PyTorch implementation of LightPERMANOVA
    """


[docs]
    @staticmethod
    def plot_distributions(sample_1: torch.Tensor, sample_2: torch.Tensor):
        """
        Plots the comparison of the distributions for each variable in the samples.

        Args:
            sample_1 (np.ndarray): original sample
            sample_2 (np.ndarray): sample with noise
        """
        for i in range(sample_1.shape[1]):
            sns.histplot(sample_1[:, i], label="original")
            sns.histplot(sample_2[:, i], label="new")
            plt.title(f"Difference in var {i}")
            plt.legend()
            plt.show()



[docs]
    @staticmethod
    def generate_samples_with_normal_covariates(
        dim: int, noise_mean: float, noise_std: float, size: int
    ):
        """
        Generates samples with normally distributed covariates

        Args:
            dim (int): number of covariates that describe the samples
            noise_mean (float): mean of the noise applied to each variable of the original sample
            noise_std (float): std of the noise applied to each variable of the original sample
            size (int): size of the samples

        Returns:
            tuple: (sample_1: np.ndarray, sample_2: np.ndarray)
        """
        torch.manual_seed(6)
        mean1 = torch.arange(dim, dtype=torch.float)
        A = torch.randn((dim, dim), dtype=torch.float)
        covariance_matrix1 = torch.mm(A, A.T)

        distribution = torch.distributions.MultivariateNormal(mean1, covariance_matrix1)

        sample_1 = distribution.sample((size,))

        noise = torch.normal(noise_mean, noise_std, sample_1.shape)
        sample_2 = sample_1 + noise

        return sample_1, sample_2



[docs]
    @staticmethod
    def generate_samples_with_mixture_covariates(
        dim: int, noise_mean_factor: float, noise_std_factor: float, size: int
    ):
        """
        Generates samples with covariates drawn from mixture of two gaussians

        Args:
            dim (int): number of covariates that describe the samples
            noise_mean_factor (float): to each of the gaussians of sample_2 will be applied a noise with mean = noise_mean_factor * original mean of the gaussian
            noise_std_factor (float): to each of the gaussians of sample_2 will be applied a noise with std = noise_std_factor * original std of the gaussian
            size (int): size of the samples

        Returns:
            tuple: (sample_1: np.ndarray, sample_2: np.ndarray)
        """
        torch.manual_seed(6)
        sample_1 = torch.Tensor([])
        sample_2 = torch.Tensor([])
        for _ in range(dim):
            mean1 = torch.randint(low=0, high=5, size=(1,), dtype=torch.float)
            std1 = torch.abs(torch.randn((1,)))
            mean2 = torch.randint(low=0, high=5, size=(1,), dtype=torch.float)
            std2 = torch.abs(torch.randn((1,)))
            # Generate random samples from a uniform distribution to decide which Gaussian to sample from
            mix = torch.rand((size,))

            # Sample from the first Gaussian where mix is less than weight1
            s1 = torch.normal(mean1.item(), std1.item(), (size,))
            s2 = torch.normal(mean2.item(), std2.item(), (size,))
            noise_1 = torch.normal(
                (mean1.item() * noise_mean_factor),
                (std1.item() * noise_std_factor),
                (size,),
            )
            noise_2 = torch.normal(
                (mean2.item() * noise_mean_factor),
                (std2.item() * noise_std_factor),
                (size,),
            )

            var_1 = torch.where(
                mix < 0.5,
                s1,
                s2,
            )

            var_2 = torch.where(
                mix < 0.5,
                s1 + noise_1,
                s2 + noise_2,
            )

            sample_1 = torch.cat((sample_1, var_1[:, None]), dim=-1)
            sample_2 = torch.cat((sample_2, var_2[:, None]), dim=-1)

        return sample_1, sample_2



[docs]
    @staticmethod
    def test(sample_1: torch.Tensor, sample_2: torch.Tensor, show_plots: bool = False):
        if show_plots:
            TorchTest.plot_distributions(sample_1, sample_2)

        print("Computing permutations...")
        permanova = LightPERMANOVATorch(sample_1, compress=True)
        pvalue = permanova.run_simulation(sample_2, tot_permutations=10000)
        print(f"p-value: {round(pvalue.item(),5)}")