"""
Copyright (c) 2024 Lukas Koch. All rights reserved.
Statistical distributions that are useful, but not available in
``scipy.stats``.
"""
from __future__ import annotations
from typing import Any, Iterable
import numpy as np
from numpy.typing import ArrayLike, NDArray
from scipy.stats import chi, chi2, rv_continuous
from ._fmax import TestStatistic
# We need to define methods with the new distributions' parameters
# pylint: disable=arguments-differ
# We need to inherit from non-typed scipy
# mypy: disable-error-code="misc, no-any-return"
[docs]
class Bee(rv_continuous):
"""A random variable representing the maximum of `df` chi distributions.
Each :any:`chi <scipy.stats.chi>` disitribution has ``df = 1``.
.. note::
You probably do not need to use this class directly. Instead work with
the instance :data:`bee`.
Parameters
----------
df : int
The number of chi-distirbuted variables to take the maximum of.
"""
def _cdf(self, x: ArrayLike, df: int) -> ArrayLike:
return chi.cdf(x, df=1) ** df
#: Use this instance of :class:`Bee`
bee = Bee(name="bee", a=0)
[docs]
class Bee2(rv_continuous):
"""A random variable representing the maximum of `df` chi2 distributions.
Each :any:`chii2 <scipy.stats.chi2>` disitribution has ``df = 1``.
.. note::
You probably do not need to use this class directly. Instead work with
the instance :data:`bee2`.
Parameters
----------
df : int
The number of chi-distirbuted variables to take the maximum of.
Notes
-----
This distribution is discussed in [1]_ in the context of robust test
statistics.
References
----------
.. [1] L. Koch, "Robust test statistics for data sets with missing
correlation information," Phys. Rev. D 103, 113008 (2021) , Vol. 103, No.
11 p. 113008, https://arxiv.org/abs/2102.06172
"""
def _cdf(self, x: ArrayLike, df: int) -> ArrayLike:
return chi2.cdf(x, df=1) ** df
#: Use this instance of :class:`Bee2`
bee2 = Bee2(name="bee2", a=0)
[docs]
class DF:
"""Helper class to get around limitations of `rv_continuous`.
This class represents the shape parameter for the :class:`Cee` and
:class:`Cee2` distributions.
Parameters
----------
*args : tuple of int
The `df` for each chi(2) distribution
"""
def __init__(self, *args: tuple[int]) -> None:
self.df = args
def __gt__(self, other: Any) -> bool:
return True
[docs]
class Cee(rv_continuous):
"""A random variable representing the maximum of multiple chi distributions.
Each :any:`chi <scipy.stats.chi>` disitribution can have a different ``df``. If
all ``df`` are equal to 1, this is identical to the :class:`Bee`
distribution with ``df = len(k)``.
.. note::
You probably do not need to use this class directly. Instead work with
the instance :data:`cee`.
Parameters
----------
k : DF or Iterable of DF
Special class to pass variable length list of degrees of freedom of the
chi-distirbuted variables to take the maximum of.
Examples
--------
>>> cee(k=DF(1,2,3)).pdf(1)
0.2501359390297275
"""
def _cdf(self, x: NDArray[Any], k: Iterable[DF]) -> NDArray[Any]:
# Translate each DF object into df tuple
dof = [_k.df for _k in k]
# Calculate and return cdf for each
return np.array([np.prod([chi.cdf(x, df=n) for n in _k], axis=0) for _k in dof])
#: Use this instance of :class:`Cee`
cee = Cee(name="cee", a=0)
[docs]
class Cee2(rv_continuous):
"""A random variable representing the maximum of multiple chi2 distributions.
Each :any:`chi2 <scipy.stats.chi2>` disitribution can have a different ``df``. If
all ``df`` are equal to 1, this is identical to a :class:`Bee2`
disritbution with ``df = len(k)``.
.. note::
You probably do not need to use this class directly. Instead work with
the instance :data:`cee2`.
Parameters
----------
k : DF or Iterable of DF
Special class to pass variable length list of degrees of freedom of the
chi-distirbuted variables to take the maximum of.
Examples
--------
>>> cee2(k=DF(1,2,3)).pdf(1)
0.12506796951321578
Notes
-----
TODO: Add reference to paper.
"""
def _cdf(self, x: NDArray[Any], k: Iterable[DF]) -> NDArray[Any]:
# Translate each DF object into df tuple
dof = [_k.df for _k in k]
# Calculate and return cdf for each
return np.array(
[np.prod([chi2.cdf(x, df=n) for n in _k], axis=0) for _k in dof]
)
#: Use this instance of :class:`Cee2`
cee2 = Cee2(
name="cee2",
a=0,
)
[docs]
class RVTestStatistic(rv_continuous):
"""A random variable distributed as the expectation of a :class:`TestStatistic`.
.. note::
You probably do not need to use this class directly. Instead work with
the instance :data:`rvteststatistic`.
Parameters
----------
statistic : TestStatistic or Iterable of TestStatistic
Examples
--------
>>> ts = OptimalFMaxStatistic(k=[1,2,3])
>>> rvteststatistic(statistic=ts).pdf(1)
0.24601379637056994
"""
def _cdf(self, x: NDArray[Any], statistic: Iterable[TestStatistic]) -> NDArray[Any]:
# Calculate and return cdf for each statistic
return np.array([s.cdf(x) for s in statistic])
#: Use this instance of :class:`RVTestStatistic`
rvteststatistic = RVTestStatistic(
name="rvteststatistic",
)
__all__ = ["DF"]
# Export all distributions and their instances
_g = dict(globals().items())
for _s, _x in _g.items():
# Only look at distribution classes
if isinstance(_x, type) and issubclass(_x, rv_continuous) and _s != "rv_continuous":
# Include the class
__all__.append(_s)
# And the instance
__all__.append(_s.lower())