Probability Mass Function (Pmf)¶

A Pmf object represents a mapping from quantities to their probabilities.

Pmf is a subclass of a Pandas Series, so it has all Series methods, although some are overridden to change their behavior.

Bases: Distribution

Represents a probability Mass Function (PMF).

Source code in empiricaldist/empiricaldist.py

class Pmf(Distribution):
    """Represents a probability Mass Function (PMF)."""

    def copy(self, deep=True):
        """Make a copy.

        Returns: new Pmf
        """
        return Pmf(self, copy=deep)

    def make_pmf(self, **kwargs):
        """Make a Pmf from the Pmf.

        Returns: Pmf
        """
        if kwargs:
            return Pmf(self, **kwargs)
        return self

    # Pmf overrides the arithmetic operations in order
    # to provide fill_value=0 and return a Pmf.

    def add(self, x, **kwargs):
        """Override add to default fill_value to 0.

        Args:
            x: Distribution, sequence, array, or scalar
            kwargs: passed to Series.add

        Returns: Pmf
        """
        underride(kwargs, fill_value=0)
        s = pd.Series(self, copy=False).add(x, **kwargs)
        return Pmf(s)

    __add__ = add
    __radd__ = add

    def sub(self, x, **kwargs):
        """Override the - operator to default fill_value to 0.

        Args:
            x: Distribution, sequence, array, or scalar
            kwargs: passed to Series.sub

        Returns:  Pmf
        """
        underride(kwargs, fill_value=0)
        s = pd.Series.sub(self, x, **kwargs)
        # s = pd.Series(self, copy=False).sub(self, x, **kwargs)
        return Pmf(s)

    subtract = sub
    __sub__ = sub

    def __rsub__(self, x, **kwargs):
        """Handle reverse subtraction operation.

        Args:
            x: Distribution, sequence, array, or scalar
            kwargs: passed to Series.sub

        Returns: Pmf
        """
        # Reverse the subtraction: x - self
        return Pmf(x).sub(self, **kwargs)

    def mul(self, x, **kwargs):
        """Override the * operator to default fill_value to 0.

        Args:
            x: Distribution, sequence, array, or scalar
            kwargs: passed to Series.mul

        Returns:  Pmf
        """
        underride(kwargs, fill_value=0)
        s = pd.Series(self, copy=False).mul(x, **kwargs)
        return Pmf(s)

    multiply = mul
    __mul__ = mul
    __rmul__ = mul

    def div(self, x, **kwargs):
        """Override the / operator to default fill_value to 0.

        Args:
            x: Distribution, sequence, array, or scalar
            kwargs: passed to Series.div

        Returns:  Pmf
        """
        underride(kwargs, fill_value=0)
        s = pd.Series(self, copy=False).div(x, **kwargs)
        return Pmf(s)

    divide = div
    __truediv__ = div

    def __rtruediv__(self, x, **kwargs):
        """Handle reverse division operation.

        Args:
            x: Distribution, sequence, array, or scalar
            kwargs: passed to Series.div

        Returns:
            Pmf
        """
        # Reverse the division: x / self
        # TODO: Make this work with sequence, array, and scalar
        return Pmf(x).div(self, **kwargs)

    def normalize(self):
        """Make the probabilities add up to 1 (modifies self).

        Returns: float, normalizing constant
        """
        total = self.sum()
        self /= total
        return total

    def mean(self):
        """Computes expected value.

        Returns: float
        """
        if not np.allclose(1, self.sum()):
            raise ValueError("Pmf must be normalized before computing mean")

        if not pd.api.types.is_numeric_dtype(self.dtype):
            raise ValueError("mean is only defined for numeric data")

        return np.sum(self.ps * self.qs)

    def var(self):
        """Variance of a PMF.

        Returns: float
        """
        m = self.mean()
        d = self.qs - m
        return np.sum(d**2 * self.ps)

    def std(self):
        """Standard deviation of a PMF.

        Returns: float
        """
        return np.sqrt(self.var())

    def mode(self, **kwargs):
        """Most common value.

        If multiple quantities have the maximum probability,
        the first maximal quantity is returned.

        Args:
            kwargs: passed to Series.mode

        Returns: type of the quantities
        """
        underride(kwargs, skipna=True)
        return self.idxmax(**kwargs)

    max_prob = mode

    def choice(self, size=1, **kwargs):
        """Makes a random selection.

        Uses the probabilities as weights unless `p` is provided.

        Args:
            size: number of values or tuple of dimensions
            kwargs: passed to np.random.choice

        Returns: NumPy array
        """
        underride(kwargs, p=self.ps)
        return np.random.choice(self.qs, size, **kwargs)

    def add_dist(self, x):
        """Computes the Pmf of the sum of values drawn from self and x.

        Args:
            x: Distribution, scalar, or sequence

        Returns: new Pmf
        """
        if isinstance(x, Distribution):
            return self.convolve_dist(x, np.add.outer)
        else:
            return Pmf(self.ps.copy(), index=self.qs + x)

    def sub_dist(self, x):
        """Computes the Pmf of the diff of values drawn from self and other.

        Args:
            x: Distribution, scalar, or sequence

        Returns: new Pmf
        """
        if isinstance(x, Distribution):
            return self.convolve_dist(x, np.subtract.outer)
        else:
            return Pmf(self.ps.copy(), index=self.qs - x)

    def mul_dist(self, x):
        """Computes the Pmf of the product of values drawn from self and x.

        Args:
            x: Distribution, scalar, or sequence

        Returns: new Pmf
        """
        if isinstance(x, Distribution):
            return self.convolve_dist(x, np.multiply.outer)
        else:
            return Pmf(self.ps.copy(), index=self.qs * x)

    def div_dist(self, x):
        """Computes the Pmf of the ratio of values drawn from self and x.

        Args:
            x: Distribution, scalar, or sequence

        Returns: new Pmf
        """
        if isinstance(x, Distribution):
            return self.convolve_dist(x, np.divide.outer)
        else:
            return Pmf(self.ps.copy(), index=self.qs / x)

    def convolve_dist(self, dist, ufunc):
        """Convolve two distributions.

        Args:
            dist: Distribution
            ufunc: elementwise function for arrays

        Returns: new Pmf
        """
        dist = dist.make_pmf()
        qs = ufunc(self.qs, dist.qs).flatten()
        ps = np.multiply.outer(self.ps, dist.ps).flatten()
        series = pd.Series(ps).groupby(qs).sum()

        return Pmf(series)

    def gt_dist(self, x):
        """Probability that a value from self exceeds a value from x.

        Args:
            x: Distribution object or scalar

        Returns: float probability
        """
        if isinstance(x, Distribution):
            return self.pmf_outer(x, np.greater).sum()
        else:
            return self[self.qs > x].sum()

    def lt_dist(self, x):
        """Probability that a value from self is less than a value from x.

        Args:
            x: Distribution object or scalar

        Returns: float probability
        """
        if isinstance(x, Distribution):
            return self.pmf_outer(x, np.less).sum()
        else:
            return self[self.qs < x].sum()

    def ge_dist(self, x):
        """Probability that a value from self is >= than a value from x.

        Args:
            x: Distribution object or scalar

        Returns: float probability
        """
        if isinstance(x, Distribution):
            return self.pmf_outer(x, np.greater_equal).sum()
        else:
            return self[self.qs >= x].sum()

    def le_dist(self, x):
        """Probability that a value from self is <= than a value from x.

        Args:
            x: Distribution object or scalar

        Returns: float probability
        """
        if isinstance(x, Distribution):
            return self.pmf_outer(x, np.less_equal).sum()
        else:
            return self[self.qs <= x].sum()

    def eq_dist(self, x):
        """Probability that a value from self equals a value from x.

        Args:
            x: Distribution object or scalar

        Returns: float probability
        """
        if isinstance(x, Distribution):
            return self.pmf_outer(x, np.equal).sum()
        else:
            return self[self.qs == x].sum()

    def ne_dist(self, x):
        """Probability that a value from self is <= than a value from x.

        Args:
            x: Distribution object or scalar

        Returns: float probability
        """
        if isinstance(x, Distribution):
            return self.pmf_outer(x, np.not_equal).sum()
        else:
            return self[self.qs != x].sum()

    def pmf_outer(self, dist, ufunc):
        """Computes the outer product of two PMFs.

        Args:
            dist: Distribution object
            ufunc: function to apply to the quantities

        Returns: NumPy array
        """
        dist = dist.make_pmf()
        qs = ufunc.outer(self.qs, dist.qs)
        ps = np.multiply.outer(self.ps, dist.ps)
        return qs * ps

    def make_joint(self, other, **options):
        """Make joint distribution (assuming independence).

        Args:
            other: Pmf
            options: passed to Pmf constructor

        Returns: new Pmf
        """
        qs = pd.MultiIndex.from_product([self.qs, other.qs])
        ps = np.multiply.outer(self.ps, other.ps).flatten()
        return Pmf(ps, index=qs, **options)

    def marginal(self, i, name=None):
        """Gets the marginal distribution of the indicated variable.

        Args:
            i: index of the variable we want
            name: string

        Returns: Pmf
        """
        # The following is deprecated now
        # return Pmf(self.sum(level=i))

        # here's the new version
        return Pmf(self.groupby(level=i).sum(), name=name)

    def conditional(self, i, val, name=None):
        """Gets the conditional distribution of the indicated variable.

        Args:
            i: index of the variable we're conditioning on
            val: the value the ith variable has to have
            name: string

        Returns: Pmf
        """
        pmf = Pmf(self.xs(key=val, level=i), copy=True, name=name)
        pmf.normalize()
        return pmf

    def update(self, likelihood, data):
        """Bayesian update.

        Args:
            likelihood: function that takes (data, hypo) and returns
                        likelihood of data under hypo, P(data|hypo)
            data: in whatever format likelihood understands

        Returns: normalizing constant
        """
        for hypo in self.qs:
            self[hypo] *= likelihood(data, hypo)

        return self.normalize()

    def make_cdf(self, **kwargs):
        """Make a Cdf from the Pmf.

        Args:
            kwargs: passed to the pd.Series constructor

        Returns: Cdf
        """
        normalize = kwargs.pop("normalize", False)

        pmf = self.sort_index()
        cumulative = np.cumsum(pmf)
        cdf = Cdf(cumulative, pmf.index.copy(), **kwargs)

        if normalize:
            cdf.normalize()

        return cdf

    def make_surv(self, **kwargs):
        """Make a Surv from the Pmf.

        Args:
            kwargs: passed to the pd.Series constructor

        Returns: Surv
        """
        cdf = self.make_cdf()
        return cdf.make_surv(**kwargs)

    def make_hazard(self, normalize=False, **kwargs):
        """Make a Hazard from the Pmf.

        Args:
            kwargs: passed to the pd.Series constructor

        Returns: Hazard
        """
        surv = self.make_surv()
        haz = Hazard(self / (self + surv), **kwargs)
        haz.total = getattr(surv, "total", 1.0)
        if normalize:
            self.normalize()
        return haz

    def make_same(self, dist):
        """Convert the given dist to Pmf.

        Args:
            dist: Distribution

        Returns: Pmf
        """
        return dist.make_pmf()

    @staticmethod
    def from_seq(
        seq,
        normalize=True,
        sort=True,
        ascending=True,
        dropna=True,
        na_position="last",
        **options,
    ):
        """Make a PMF from a sequence of values.

        Args:
            seq: iterable
            normalize: whether to normalize the Pmf, default True
            sort: whether to sort the Pmf by values, default True
            ascending: whether to sort in ascending order, default True
            dropna: whether to drop NaN values, default True
            na_position: If 'first' puts NaNs at the beginning,
                        'last' puts NaNs at the end.
        options: passed to the pd.Series constructor

        Returns: Pmf object
        """
        # compute the value counts
        series = pd.Series(seq).value_counts(
            normalize=normalize, sort=False, dropna=dropna
        )
        # make the result a Pmf
        # (since we just made a fresh Series, there is no reason to copy it)
        options["copy"] = False
        underride(options, name="")
        pmf = Pmf(series, **options)

        # sort in place, if desired
        if sort:
            pmf.sort_index(
                inplace=True, ascending=ascending, na_position=na_position
            )

        return pmf

`rsub(x, **kwargs)` ¶

Handle reverse subtraction operation.

Parameters:	`x` – Distribution, sequence, array, or scalar `kwargs` – passed to Series.sub

Returns: Pmf

Source code in empiricaldist/empiricaldist.py

def __rsub__(self, x, **kwargs):
    """Handle reverse subtraction operation.

    Args:
        x: Distribution, sequence, array, or scalar
        kwargs: passed to Series.sub

    Returns: Pmf
    """
    # Reverse the subtraction: x - self
    return Pmf(x).sub(self, **kwargs)

`rtruediv(x, **kwargs)` ¶

Handle reverse division operation.

Parameters:	`x` – Distribution, sequence, array, or scalar `kwargs` – passed to Series.div

Returns:	– Pmf

Source code in empiricaldist/empiricaldist.py

def __rtruediv__(self, x, **kwargs):
    """Handle reverse division operation.

    Args:
        x: Distribution, sequence, array, or scalar
        kwargs: passed to Series.div

    Returns:
        Pmf
    """
    # Reverse the division: x / self
    # TODO: Make this work with sequence, array, and scalar
    return Pmf(x).div(self, **kwargs)

`add(x, **kwargs)` ¶

Override add to default fill_value to 0.

Parameters:	`x` – Distribution, sequence, array, or scalar `kwargs` – passed to Series.add

Returns: Pmf

Source code in empiricaldist/empiricaldist.py

def add(self, x, **kwargs):
    """Override add to default fill_value to 0.

    Args:
        x: Distribution, sequence, array, or scalar
        kwargs: passed to Series.add

    Returns: Pmf
    """
    underride(kwargs, fill_value=0)
    s = pd.Series(self, copy=False).add(x, **kwargs)
    return Pmf(s)

`add_dist(x)` ¶

Computes the Pmf of the sum of values drawn from self and x.

Parameters:	`x` – Distribution, scalar, or sequence

Returns: new Pmf

Source code in empiricaldist/empiricaldist.py

def add_dist(self, x):
    """Computes the Pmf of the sum of values drawn from self and x.

    Args:
        x: Distribution, scalar, or sequence

    Returns: new Pmf
    """
    if isinstance(x, Distribution):
        return self.convolve_dist(x, np.add.outer)
    else:
        return Pmf(self.ps.copy(), index=self.qs + x)

`choice(size=1, **kwargs)` ¶

Makes a random selection.

Uses the probabilities as weights unless p is provided.

Parameters:	`size` – number of values or tuple of dimensions `kwargs` – passed to np.random.choice

Returns: NumPy array

Source code in empiricaldist/empiricaldist.py

def choice(self, size=1, **kwargs):
    """Makes a random selection.

    Uses the probabilities as weights unless `p` is provided.

    Args:
        size: number of values or tuple of dimensions
        kwargs: passed to np.random.choice

    Returns: NumPy array
    """
    underride(kwargs, p=self.ps)
    return np.random.choice(self.qs, size, **kwargs)

`conditional(i, val, name=None)` ¶

Gets the conditional distribution of the indicated variable.

Parameters:	`i` – index of the variable we're conditioning on `val` – the value the ith variable has to have `name` – string

Returns: Pmf

Source code in empiricaldist/empiricaldist.py

def conditional(self, i, val, name=None):
    """Gets the conditional distribution of the indicated variable.

    Args:
        i: index of the variable we're conditioning on
        val: the value the ith variable has to have
        name: string

    Returns: Pmf
    """
    pmf = Pmf(self.xs(key=val, level=i), copy=True, name=name)
    pmf.normalize()
    return pmf

`convolve_dist(dist, ufunc)` ¶

Convolve two distributions.

Parameters:	`dist` – Distribution `ufunc` – elementwise function for arrays

Returns: new Pmf

Source code in empiricaldist/empiricaldist.py

def convolve_dist(self, dist, ufunc):
    """Convolve two distributions.

    Args:
        dist: Distribution
        ufunc: elementwise function for arrays

    Returns: new Pmf
    """
    dist = dist.make_pmf()
    qs = ufunc(self.qs, dist.qs).flatten()
    ps = np.multiply.outer(self.ps, dist.ps).flatten()
    series = pd.Series(ps).groupby(qs).sum()

    return Pmf(series)

`copy(deep=True)` ¶

Make a copy.

Returns: new Pmf

Source code in empiricaldist/empiricaldist.py

def copy(self, deep=True):
    """Make a copy.

    Returns: new Pmf
    """
    return Pmf(self, copy=deep)

`div(x, **kwargs)` ¶

Override the / operator to default fill_value to 0.

Parameters:	`x` – Distribution, sequence, array, or scalar `kwargs` – passed to Series.div

Returns: Pmf

Source code in empiricaldist/empiricaldist.py

def div(self, x, **kwargs):
    """Override the / operator to default fill_value to 0.

    Args:
        x: Distribution, sequence, array, or scalar
        kwargs: passed to Series.div

    Returns:  Pmf
    """
    underride(kwargs, fill_value=0)
    s = pd.Series(self, copy=False).div(x, **kwargs)
    return Pmf(s)

`div_dist(x)` ¶

Computes the Pmf of the ratio of values drawn from self and x.

Parameters:	`x` – Distribution, scalar, or sequence

Returns: new Pmf

Source code in empiricaldist/empiricaldist.py

def div_dist(self, x):
    """Computes the Pmf of the ratio of values drawn from self and x.

    Args:
        x: Distribution, scalar, or sequence

    Returns: new Pmf
    """
    if isinstance(x, Distribution):
        return self.convolve_dist(x, np.divide.outer)
    else:
        return Pmf(self.ps.copy(), index=self.qs / x)

`eq_dist(x)` ¶

Probability that a value from self equals a value from x.

Parameters:	`x` – Distribution object or scalar

Returns: float probability

Source code in empiricaldist/empiricaldist.py

def eq_dist(self, x):
    """Probability that a value from self equals a value from x.

    Args:
        x: Distribution object or scalar

    Returns: float probability
    """
    if isinstance(x, Distribution):
        return self.pmf_outer(x, np.equal).sum()
    else:
        return self[self.qs == x].sum()

`from_seq(seq, normalize=True, sort=True, ascending=True, dropna=True, na_position='last', **options)` `staticmethod` ¶

Make a PMF from a sequence of values.

Parameters:	`seq` – iterable `normalize` – whether to normalize the Pmf, default True `sort` – whether to sort the Pmf by values, default True `ascending` – whether to sort in ascending order, default True `dropna` – whether to drop NaN values, default True `na_position` – If 'first' puts NaNs at the beginning, 'last' puts NaNs at the end.

options: passed to the pd.Series constructor

Returns: Pmf object

Source code in empiricaldist/empiricaldist.py

@staticmethod
def from_seq(
    seq,
    normalize=True,
    sort=True,
    ascending=True,
    dropna=True,
    na_position="last",
    **options,
):
    """Make a PMF from a sequence of values.

    Args:
        seq: iterable
        normalize: whether to normalize the Pmf, default True
        sort: whether to sort the Pmf by values, default True
        ascending: whether to sort in ascending order, default True
        dropna: whether to drop NaN values, default True
        na_position: If 'first' puts NaNs at the beginning,
                    'last' puts NaNs at the end.
    options: passed to the pd.Series constructor

    Returns: Pmf object
    """
    # compute the value counts
    series = pd.Series(seq).value_counts(
        normalize=normalize, sort=False, dropna=dropna
    )
    # make the result a Pmf
    # (since we just made a fresh Series, there is no reason to copy it)
    options["copy"] = False
    underride(options, name="")
    pmf = Pmf(series, **options)

    # sort in place, if desired
    if sort:
        pmf.sort_index(
            inplace=True, ascending=ascending, na_position=na_position
        )

    return pmf

`ge_dist(x)` ¶

Probability that a value from self is >= than a value from x.

Parameters:	`x` – Distribution object or scalar

Returns: float probability

Source code in empiricaldist/empiricaldist.py

def ge_dist(self, x):
    """Probability that a value from self is >= than a value from x.

    Args:
        x: Distribution object or scalar

    Returns: float probability
    """
    if isinstance(x, Distribution):
        return self.pmf_outer(x, np.greater_equal).sum()
    else:
        return self[self.qs >= x].sum()

`gt_dist(x)` ¶

Probability that a value from self exceeds a value from x.

Parameters:	`x` – Distribution object or scalar

Returns: float probability

Source code in empiricaldist/empiricaldist.py

def gt_dist(self, x):
    """Probability that a value from self exceeds a value from x.

    Args:
        x: Distribution object or scalar

    Returns: float probability
    """
    if isinstance(x, Distribution):
        return self.pmf_outer(x, np.greater).sum()
    else:
        return self[self.qs > x].sum()

`le_dist(x)` ¶

Probability that a value from self is <= than a value from x.

Parameters:	`x` – Distribution object or scalar

Returns: float probability

Source code in empiricaldist/empiricaldist.py

def le_dist(self, x):
    """Probability that a value from self is <= than a value from x.

    Args:
        x: Distribution object or scalar

    Returns: float probability
    """
    if isinstance(x, Distribution):
        return self.pmf_outer(x, np.less_equal).sum()
    else:
        return self[self.qs <= x].sum()

`lt_dist(x)` ¶

Probability that a value from self is less than a value from x.

Parameters:	`x` – Distribution object or scalar

Returns: float probability

Source code in empiricaldist/empiricaldist.py

def lt_dist(self, x):
    """Probability that a value from self is less than a value from x.

    Args:
        x: Distribution object or scalar

    Returns: float probability
    """
    if isinstance(x, Distribution):
        return self.pmf_outer(x, np.less).sum()
    else:
        return self[self.qs < x].sum()

`make_cdf(**kwargs)` ¶

Make a Cdf from the Pmf.

Parameters:	`kwargs` – passed to the pd.Series constructor

Returns: Cdf

Source code in empiricaldist/empiricaldist.py

def make_cdf(self, **kwargs):
    """Make a Cdf from the Pmf.

    Args:
        kwargs: passed to the pd.Series constructor

    Returns: Cdf
    """
    normalize = kwargs.pop("normalize", False)

    pmf = self.sort_index()
    cumulative = np.cumsum(pmf)
    cdf = Cdf(cumulative, pmf.index.copy(), **kwargs)

    if normalize:
        cdf.normalize()

    return cdf

`make_hazard(normalize=False, **kwargs)` ¶

Make a Hazard from the Pmf.

Parameters:	`kwargs` – passed to the pd.Series constructor

Returns: Hazard

Source code in empiricaldist/empiricaldist.py

def make_hazard(self, normalize=False, **kwargs):
    """Make a Hazard from the Pmf.

    Args:
        kwargs: passed to the pd.Series constructor

    Returns: Hazard
    """
    surv = self.make_surv()
    haz = Hazard(self / (self + surv), **kwargs)
    haz.total = getattr(surv, "total", 1.0)
    if normalize:
        self.normalize()
    return haz

`make_joint(other, **options)` ¶

Make joint distribution (assuming independence).

Parameters:	`other` – Pmf `options` – passed to Pmf constructor

Returns: new Pmf

Source code in empiricaldist/empiricaldist.py

def make_joint(self, other, **options):
    """Make joint distribution (assuming independence).

    Args:
        other: Pmf
        options: passed to Pmf constructor

    Returns: new Pmf
    """
    qs = pd.MultiIndex.from_product([self.qs, other.qs])
    ps = np.multiply.outer(self.ps, other.ps).flatten()
    return Pmf(ps, index=qs, **options)

`make_pmf(**kwargs)` ¶

Make a Pmf from the Pmf.

Returns: Pmf

Source code in empiricaldist/empiricaldist.py

def make_pmf(self, **kwargs):
    """Make a Pmf from the Pmf.

    Returns: Pmf
    """
    if kwargs:
        return Pmf(self, **kwargs)
    return self

`make_same(dist)` ¶

Convert the given dist to Pmf.

Parameters:	`dist` – Distribution

Returns: Pmf

Source code in empiricaldist/empiricaldist.py

def make_same(self, dist):
    """Convert the given dist to Pmf.

    Args:
        dist: Distribution

    Returns: Pmf
    """
    return dist.make_pmf()

`make_surv(**kwargs)` ¶

Make a Surv from the Pmf.

Parameters:	`kwargs` – passed to the pd.Series constructor

Returns: Surv

Source code in empiricaldist/empiricaldist.py

def make_surv(self, **kwargs):
    """Make a Surv from the Pmf.

    Args:
        kwargs: passed to the pd.Series constructor

    Returns: Surv
    """
    cdf = self.make_cdf()
    return cdf.make_surv(**kwargs)

`marginal(i, name=None)` ¶

Gets the marginal distribution of the indicated variable.

Parameters:	`i` – index of the variable we want `name` – string

Returns: Pmf

Source code in empiricaldist/empiricaldist.py

def marginal(self, i, name=None):
    """Gets the marginal distribution of the indicated variable.

    Args:
        i: index of the variable we want
        name: string

    Returns: Pmf
    """
    # The following is deprecated now
    # return Pmf(self.sum(level=i))

    # here's the new version
    return Pmf(self.groupby(level=i).sum(), name=name)

`mean()` ¶

Computes expected value.

Returns: float

Source code in empiricaldist/empiricaldist.py

def mean(self):
    """Computes expected value.

    Returns: float
    """
    if not np.allclose(1, self.sum()):
        raise ValueError("Pmf must be normalized before computing mean")

    if not pd.api.types.is_numeric_dtype(self.dtype):
        raise ValueError("mean is only defined for numeric data")

    return np.sum(self.ps * self.qs)

`mode(**kwargs)` ¶

Most common value.

If multiple quantities have the maximum probability, the first maximal quantity is returned.

Parameters:	`kwargs` – passed to Series.mode

Returns: type of the quantities

Source code in empiricaldist/empiricaldist.py

def mode(self, **kwargs):
    """Most common value.

    If multiple quantities have the maximum probability,
    the first maximal quantity is returned.

    Args:
        kwargs: passed to Series.mode

    Returns: type of the quantities
    """
    underride(kwargs, skipna=True)
    return self.idxmax(**kwargs)

`mul(x, **kwargs)` ¶

Override the * operator to default fill_value to 0.

Parameters:	`x` – Distribution, sequence, array, or scalar `kwargs` – passed to Series.mul

Returns: Pmf

Source code in empiricaldist/empiricaldist.py

def mul(self, x, **kwargs):
    """Override the * operator to default fill_value to 0.

    Args:
        x: Distribution, sequence, array, or scalar
        kwargs: passed to Series.mul

    Returns:  Pmf
    """
    underride(kwargs, fill_value=0)
    s = pd.Series(self, copy=False).mul(x, **kwargs)
    return Pmf(s)

`mul_dist(x)` ¶

Computes the Pmf of the product of values drawn from self and x.

Parameters:	`x` – Distribution, scalar, or sequence

Returns: new Pmf

Source code in empiricaldist/empiricaldist.py

def mul_dist(self, x):
    """Computes the Pmf of the product of values drawn from self and x.

    Args:
        x: Distribution, scalar, or sequence

    Returns: new Pmf
    """
    if isinstance(x, Distribution):
        return self.convolve_dist(x, np.multiply.outer)
    else:
        return Pmf(self.ps.copy(), index=self.qs * x)

`ne_dist(x)` ¶

Probability that a value from self is <= than a value from x.

Parameters:	`x` – Distribution object or scalar

Returns: float probability

Source code in empiricaldist/empiricaldist.py

def ne_dist(self, x):
    """Probability that a value from self is <= than a value from x.

    Args:
        x: Distribution object or scalar

    Returns: float probability
    """
    if isinstance(x, Distribution):
        return self.pmf_outer(x, np.not_equal).sum()
    else:
        return self[self.qs != x].sum()

`normalize()` ¶

Make the probabilities add up to 1 (modifies self).

Returns: float, normalizing constant

Source code in empiricaldist/empiricaldist.py

def normalize(self):
    """Make the probabilities add up to 1 (modifies self).

    Returns: float, normalizing constant
    """
    total = self.sum()
    self /= total
    return total

`pmf_outer(dist, ufunc)` ¶

Computes the outer product of two PMFs.

Parameters:	`dist` – Distribution object `ufunc` – function to apply to the quantities

Returns: NumPy array

Source code in empiricaldist/empiricaldist.py

def pmf_outer(self, dist, ufunc):
    """Computes the outer product of two PMFs.

    Args:
        dist: Distribution object
        ufunc: function to apply to the quantities

    Returns: NumPy array
    """
    dist = dist.make_pmf()
    qs = ufunc.outer(self.qs, dist.qs)
    ps = np.multiply.outer(self.ps, dist.ps)
    return qs * ps

`std()` ¶

Standard deviation of a PMF.

Returns: float

Source code in empiricaldist/empiricaldist.py

def std(self):
    """Standard deviation of a PMF.

    Returns: float
    """
    return np.sqrt(self.var())

`sub(x, **kwargs)` ¶

Override the - operator to default fill_value to 0.

Parameters:	`x` – Distribution, sequence, array, or scalar `kwargs` – passed to Series.sub

Returns: Pmf

Source code in empiricaldist/empiricaldist.py

def sub(self, x, **kwargs):
    """Override the - operator to default fill_value to 0.

    Args:
        x: Distribution, sequence, array, or scalar
        kwargs: passed to Series.sub

    Returns:  Pmf
    """
    underride(kwargs, fill_value=0)
    s = pd.Series.sub(self, x, **kwargs)
    # s = pd.Series(self, copy=False).sub(self, x, **kwargs)
    return Pmf(s)

`sub_dist(x)` ¶

Computes the Pmf of the diff of values drawn from self and other.

Parameters:	`x` – Distribution, scalar, or sequence

Returns: new Pmf

Source code in empiricaldist/empiricaldist.py

def sub_dist(self, x):
    """Computes the Pmf of the diff of values drawn from self and other.

    Args:
        x: Distribution, scalar, or sequence

    Returns: new Pmf
    """
    if isinstance(x, Distribution):
        return self.convolve_dist(x, np.subtract.outer)
    else:
        return Pmf(self.ps.copy(), index=self.qs - x)

`update(likelihood, data)` ¶

Bayesian update.

Parameters:	`likelihood` – function that takes (data, hypo) and returns likelihood of data under hypo, P(data\|hypo) `data` – in whatever format likelihood understands

Returns: normalizing constant

Source code in empiricaldist/empiricaldist.py

def update(self, likelihood, data):
    """Bayesian update.

    Args:
        likelihood: function that takes (data, hypo) and returns
                    likelihood of data under hypo, P(data|hypo)
        data: in whatever format likelihood understands

    Returns: normalizing constant
    """
    for hypo in self.qs:
        self[hypo] *= likelihood(data, hypo)

    return self.normalize()

`var()` ¶

Variance of a PMF.

Returns: float

Source code in empiricaldist/empiricaldist.py

def var(self):
    """Variance of a PMF.

    Returns: float
    """
    m = self.mean()
    d = self.qs - m
    return np.sum(d**2 * self.ps)

Probability Mass Function (Pmf)¶

__rsub__(x, **kwargs) ¶

__rtruediv__(x, **kwargs) ¶

add(x, **kwargs) ¶

add_dist(x) ¶

choice(size=1, **kwargs) ¶

conditional(i, val, name=None) ¶

convolve_dist(dist, ufunc) ¶

copy(deep=True) ¶

div(x, **kwargs) ¶

div_dist(x) ¶

eq_dist(x) ¶

from_seq(seq, normalize=True, sort=True, ascending=True, dropna=True, na_position='last', **options) staticmethod ¶

ge_dist(x) ¶

gt_dist(x) ¶

le_dist(x) ¶

lt_dist(x) ¶

make_cdf(**kwargs) ¶

make_hazard(normalize=False, **kwargs) ¶

make_joint(other, **options) ¶

make_pmf(**kwargs) ¶

make_same(dist) ¶

make_surv(**kwargs) ¶

marginal(i, name=None) ¶

mean() ¶

mode(**kwargs) ¶

mul(x, **kwargs) ¶

mul_dist(x) ¶

ne_dist(x) ¶

normalize() ¶

pmf_outer(dist, ufunc) ¶

std() ¶

sub(x, **kwargs) ¶

sub_dist(x) ¶

update(likelihood, data) ¶

var() ¶

`rsub(x, **kwargs)` ¶

`rtruediv(x, **kwargs)` ¶

`add(x, **kwargs)` ¶

`add_dist(x)` ¶

`choice(size=1, **kwargs)` ¶

`conditional(i, val, name=None)` ¶

`convolve_dist(dist, ufunc)` ¶

`copy(deep=True)` ¶

`div(x, **kwargs)` ¶

`div_dist(x)` ¶

`eq_dist(x)` ¶

`from_seq(seq, normalize=True, sort=True, ascending=True, dropna=True, na_position='last', **options)` `staticmethod` ¶

`ge_dist(x)` ¶

`gt_dist(x)` ¶

`le_dist(x)` ¶

`lt_dist(x)` ¶

`make_cdf(**kwargs)` ¶

`make_hazard(normalize=False, **kwargs)` ¶

`make_joint(other, **options)` ¶

`make_pmf(**kwargs)` ¶

`make_same(dist)` ¶

`make_surv(**kwargs)` ¶

`marginal(i, name=None)` ¶

`mean()` ¶

`mode(**kwargs)` ¶

`mul(x, **kwargs)` ¶

`mul_dist(x)` ¶

`ne_dist(x)` ¶

`normalize()` ¶

`pmf_outer(dist, ufunc)` ¶

`std()` ¶

`sub(x, **kwargs)` ¶

`sub_dist(x)` ¶

`update(likelihood, data)` ¶

`var()` ¶