"""
This modules contains different statistical methods used to compare
the observed and the simulated scores
"""
import numpy as np
from scipy import stats
[docs]class ArithmeticMean(object):
[docs] @staticmethod
def calc(values):
"""
Computes the arithmetic mean
Args:
values (:obj:`list`, :obj:`~numpy.array`): array of values
Returns:
float: mean value
"""
return np.mean(values)
[docs] @staticmethod
def calc_observed(values, observed):
"""
Measure how many times the mean of the values is higher than the mean of the observed values
Args:
values (:obj:`~numpy.array`): m x n matrix with scores (m: number of randomizations; n: number of mutations)
observed (list, :obj:`~numpy.array`): n size vector with the observed scores (n: number of mutations)
Returns:
tuple: the number of times that the mean value of a randomization is greater or equal than the mean observed value
(as :obj:`int`) and the number of times that the mean value of a randomization is equal or lower than the mean
observed value (as :obj:`int`).
"""
observed_value = np.mean(observed)
values = np.mean(values, axis=1)
obs = len(values[values >= observed_value])
neg_obs = len(values[values <= observed_value])
return obs, neg_obs
[docs]class Maximum(object):
[docs] @staticmethod
def calc(values):
return np.max(values)
[docs] @staticmethod
def calc_observed(values, observed):
observed_value = np.max(observed)
values = np.max(values, axis=1)
obs = len(values[values >= observed_value])
neg_obs = len(values[values <= observed_value])
return obs, neg_obs
[docs]class GeometricMean(object):
"""
The geometric mean used is not the standard.
.. math::
(\prod \limits_{i=1}^n (x_i+1))^{1/n}-1 &= \sqrt[n]{(x_1+1)(x_2+1) \cdots (x_n+1)} -1
"""
[docs] @staticmethod
def calc(values):
"""
Computes the geometric mean of a set of values.
Args:
values (:obj:`list`, :obj:`~numpy.array`): set of values
Returns:
(float): geometric mean
(array): geometric mean by columns (if the input is a matrix)
"""
return stats.gmean(np.array(values) + 1.0) - 1.0
[docs] @staticmethod
def calc_observed(values, observed):
"""
Measure how many times the geometric mean of the values is higher than the geometric mean of the observed values
Args:
values (:obj:`~numpy.array`): m x n matrix with scores (m: number of randomizations; n: number of mutations)
observed (list, :obj:`~numpy.array`): n size vector with the observed scores (n: number of mutations)
Returns:
tuple: the number of times that the mean value of a randomization is greater or equal than the mean observed value
(as :obj:`int`) and the number of times that the mean value of a randomization is equal or lower than the mean
observed value (as :obj:`int`).
"""
observed_value = stats.gmean(observed + 1.0) -1.0
values = stats.gmean(values +1.0, axis=1) -1.0
obs = len(values[values >= observed_value])
neg_obs = len(values[values <= observed_value])
return obs, neg_obs
[docs]class ArithmeticMeanHeteroscedasticScores(object):
[docs] @staticmethod
def calc_observed(values, observed):
means = np.mean(values, axis=1)
mean_score = np.mean(means)
std_dev = np.std(means)
observed = observed - mean_score
values = values - mean_score
observed = observed / std_dev
values = values / std_dev
observed_value = np.mean(observed)
values = np.mean(values, axis=1)
obs = len(values[values >= observed_value])
neg_obs = len(values[values <= observed_value])
return obs, neg_obs
STATISTIC_TESTS = {
'amean': ArithmeticMean(),
'max': Maximum(),
'gmean': GeometricMean(),
'amean_scoresmodif': ArithmeticMeanHeteroscedasticScores()
}