Cumulative Distribution Function (Cdf)¶

A Cdf object represents a mapping from a quantity to the probability that a value from the distribution does not exceed the quantity.

Cdf is a subclass of a Pandas Series, so it has all Series methods, although some are overridden to change their behavior.

Bases: Distribution

Represents a Cumulative Distribution Function (CDF).

Source code in empiricaldist/empiricaldist.py

class Cdf(Distribution):
    """Represents a Cumulative Distribution Function (CDF)."""

    def copy(self, deep=True):
        """Make a copy.

        Args:
            deep: whether to make a deep copy

        Returns: new Cdf
        """
        return Cdf(self, copy=deep)

    @staticmethod
    def from_seq(seq, normalize=True, sort=True, **options):
        """Make a CDF from a sequence of values.

        Args:
            seq: iterable
            normalize: whether to normalize the Cdf, default True
            sort: whether to sort the Cdf by values, default True
            options: passed to the pd.Series constructor

        Returns: CDF object
        """
        # if normalize==True, normalize AFTER making the Cdf
        # so the last element is exactly 1.0
        pmf = Pmf.from_seq(seq, normalize=False, sort=sort, **options)
        return pmf.make_cdf(normalize=normalize)

    def step(self, **options):
        """Plot the Cdf as a step function.

        Args:
            options: passed to pd.Series.plot
        """
        underride(options, drawstyle="steps-post")
        self.plot(**options)

    def normalize(self):
        """Make the probabilities add up to 1 (modifies self).

        Returns: normalizing constant
        """
        total = self.ps[-1]
        self /= total
        return total

    @property
    def forward(self, **kwargs):
        """Make a function that computes the forward Cdf.

        Args:
            kwargs: keyword arguments passed to interp1d

        Returns: interpolation function from qs to ps
        """

        underride(
            kwargs,
            kind="previous",
            copy=False,
            assume_sorted=True,
            bounds_error=False,
            fill_value=(0, 1),
        )

        interp = interp1d(self.qs, self.ps, **kwargs)
        return interp

    @property
    def inverse(self, **kwargs):
        """Make a function that computes the inverse Cdf.

        Args:
            kwargs: keyword arguments passed to interp1d

        Returns: interpolation function from ps to qs
        """
        underride(
            kwargs,
            kind="next",
            copy=False,
            assume_sorted=True,
            bounds_error=False,
            fill_value=(self.qs[0], np.nan),
        )

        interp = interp1d(self.ps, self.qs, **kwargs)
        return interp

    # calling a Cdf like a function does forward lookup
    __call__ = forward

    # quantile is the same as an inverse lookup
    quantile = inverse

    def median(self):
        """Median (50th percentile).

        Returns: float
        """
        return self.quantile(0.5)

    def make_pmf(self, **kwargs):
        """Make a Pmf from the Cdf.

        Args:
            normalize: Boolean, whether to normalize the Pmf
            kwargs: passed to the Pmf constructor

        Returns: Pmf
        """
        normalize = kwargs.pop("normalize", False)

        diff = np.diff(self, prepend=0)
        pmf = Pmf(diff, index=self.index.copy(), **kwargs)
        if normalize:
            pmf.normalize()
        return pmf

    def make_surv(self, **kwargs):
        """Make a Surv object from the Cdf.

        Args:
            kwargs: passed to the Surv constructor

        Returns: Surv object
        """
        normalize = kwargs.pop("normalize", False)
        total = self.ps[-1]
        surv = Surv(total - self, **kwargs)
        surv.total = total
        if normalize:
            self.normalize()
        return surv

    def make_hazard(self, **kwargs):
        """Make a Hazard from the Cdf.

        Args:
            kwargs: passed to the Hazard constructor

        Returns: Hazard
        """
        pmf = self.make_pmf()
        surv = self.make_surv()
        haz = Hazard(pmf / (pmf + surv), **kwargs)
        haz.total = getattr(surv, "total", 1.0)
        return haz

    def make_same(self, dist):
        """Convert the given dist to Cdf.

        Args:
            dist: Distribution

        Returns: Cdf
        """
        return dist.make_cdf()

    def sample(self, n=1, **kwargs):
        """Sample with replacement using probabilities as weights.

        Uses the inverse CDF.

        Args:
            n: number of values
            **kwargs: passed to interp1d

        Returns: NumPy array
        """
        ps = np.random.random(n)
        return self.inverse(ps, **kwargs)

    def max_dist(self, n):
        """Distribution of the maximum of `n` values from this distribution.

        Args:
            n: integer

        Returns: Cdf
        """
        ps = self**n
        return Cdf(ps, self.index.copy())

    def min_dist(self, n):
        """Distribution of the minimum of `n` values from this distribution.

        Args:
            n: integer

        Returns: Cdf
        """
        ps = 1 - (1 - self) ** n
        return Cdf(ps, self.index.copy())

`forward` `property` ¶

Make a function that computes the forward Cdf.

Parameters:	`kwargs` – keyword arguments passed to interp1d

Returns: interpolation function from qs to ps

`inverse` `property` ¶

Make a function that computes the inverse Cdf.

Parameters:	`kwargs` – keyword arguments passed to interp1d

Returns: interpolation function from ps to qs

`copy(deep=True)` ¶

Make a copy.

Parameters:	`deep` – whether to make a deep copy

Returns: new Cdf

Source code in empiricaldist/empiricaldist.py

def copy(self, deep=True):
    """Make a copy.

    Args:
        deep: whether to make a deep copy

    Returns: new Cdf
    """
    return Cdf(self, copy=deep)

`from_seq(seq, normalize=True, sort=True, **options)` `staticmethod` ¶

Make a CDF from a sequence of values.

Parameters:	`seq` – iterable `normalize` – whether to normalize the Cdf, default True `sort` – whether to sort the Cdf by values, default True `options` – passed to the pd.Series constructor

Returns: CDF object

Source code in empiricaldist/empiricaldist.py

@staticmethod
def from_seq(seq, normalize=True, sort=True, **options):
    """Make a CDF from a sequence of values.

    Args:
        seq: iterable
        normalize: whether to normalize the Cdf, default True
        sort: whether to sort the Cdf by values, default True
        options: passed to the pd.Series constructor

    Returns: CDF object
    """
    # if normalize==True, normalize AFTER making the Cdf
    # so the last element is exactly 1.0
    pmf = Pmf.from_seq(seq, normalize=False, sort=sort, **options)
    return pmf.make_cdf(normalize=normalize)

`make_hazard(**kwargs)` ¶

Make a Hazard from the Cdf.

Parameters:	`kwargs` – passed to the Hazard constructor

Returns: Hazard

Source code in empiricaldist/empiricaldist.py

def make_hazard(self, **kwargs):
    """Make a Hazard from the Cdf.

    Args:
        kwargs: passed to the Hazard constructor

    Returns: Hazard
    """
    pmf = self.make_pmf()
    surv = self.make_surv()
    haz = Hazard(pmf / (pmf + surv), **kwargs)
    haz.total = getattr(surv, "total", 1.0)
    return haz

`make_pmf(**kwargs)` ¶

Make a Pmf from the Cdf.

Parameters:	`normalize` – Boolean, whether to normalize the Pmf `kwargs` – passed to the Pmf constructor

Returns: Pmf

Source code in empiricaldist/empiricaldist.py

def make_pmf(self, **kwargs):
    """Make a Pmf from the Cdf.

    Args:
        normalize: Boolean, whether to normalize the Pmf
        kwargs: passed to the Pmf constructor

    Returns: Pmf
    """
    normalize = kwargs.pop("normalize", False)

    diff = np.diff(self, prepend=0)
    pmf = Pmf(diff, index=self.index.copy(), **kwargs)
    if normalize:
        pmf.normalize()
    return pmf

`make_same(dist)` ¶

Convert the given dist to Cdf.

Parameters:	`dist` – Distribution

Returns: Cdf

Source code in empiricaldist/empiricaldist.py

def make_same(self, dist):
    """Convert the given dist to Cdf.

    Args:
        dist: Distribution

    Returns: Cdf
    """
    return dist.make_cdf()

`make_surv(**kwargs)` ¶

Make a Surv object from the Cdf.

Parameters:	`kwargs` – passed to the Surv constructor

Returns: Surv object

Source code in empiricaldist/empiricaldist.py

def make_surv(self, **kwargs):
    """Make a Surv object from the Cdf.

    Args:
        kwargs: passed to the Surv constructor

    Returns: Surv object
    """
    normalize = kwargs.pop("normalize", False)
    total = self.ps[-1]
    surv = Surv(total - self, **kwargs)
    surv.total = total
    if normalize:
        self.normalize()
    return surv

`max_dist(n)` ¶

Distribution of the maximum of n values from this distribution.

Parameters:	`n` – integer

Returns: Cdf

Source code in empiricaldist/empiricaldist.py

def max_dist(self, n):
    """Distribution of the maximum of `n` values from this distribution.

    Args:
        n: integer

    Returns: Cdf
    """
    ps = self**n
    return Cdf(ps, self.index.copy())

`median()` ¶

Median (50th percentile).

Returns: float

Source code in empiricaldist/empiricaldist.py

def median(self):
    """Median (50th percentile).

    Returns: float
    """
    return self.quantile(0.5)

`min_dist(n)` ¶

Distribution of the minimum of n values from this distribution.

Parameters:	`n` – integer

Returns: Cdf

Source code in empiricaldist/empiricaldist.py

def min_dist(self, n):
    """Distribution of the minimum of `n` values from this distribution.

    Args:
        n: integer

    Returns: Cdf
    """
    ps = 1 - (1 - self) ** n
    return Cdf(ps, self.index.copy())

`normalize()` ¶

Make the probabilities add up to 1 (modifies self).

Returns: normalizing constant

Source code in empiricaldist/empiricaldist.py

def normalize(self):
    """Make the probabilities add up to 1 (modifies self).

    Returns: normalizing constant
    """
    total = self.ps[-1]
    self /= total
    return total

`sample(n=1, **kwargs)` ¶

Sample with replacement using probabilities as weights.

Uses the inverse CDF.

Parameters:	`n` – number of values `kwargs`** – passed to interp1d

Returns: NumPy array

Source code in empiricaldist/empiricaldist.py

def sample(self, n=1, **kwargs):
    """Sample with replacement using probabilities as weights.

    Uses the inverse CDF.

    Args:
        n: number of values
        **kwargs: passed to interp1d

    Returns: NumPy array
    """
    ps = np.random.random(n)
    return self.inverse(ps, **kwargs)

`step(**options)` ¶

Plot the Cdf as a step function.

Parameters:	`options` – passed to pd.Series.plot

Source code in empiricaldist/empiricaldist.py

def step(self, **options):
    """Plot the Cdf as a step function.

    Args:
        options: passed to pd.Series.plot
    """
    underride(options, drawstyle="steps-post")
    self.plot(**options)

Cumulative Distribution Function (Cdf)¶

forward property ¶

inverse property ¶

copy(deep=True) ¶

from_seq(seq, normalize=True, sort=True, **options) staticmethod ¶

make_hazard(**kwargs) ¶

make_pmf(**kwargs) ¶

make_same(dist) ¶

make_surv(**kwargs) ¶

max_dist(n) ¶

median() ¶

min_dist(n) ¶

normalize() ¶

sample(n=1, **kwargs) ¶

step(**options) ¶

`forward` `property` ¶

`inverse` `property` ¶

`copy(deep=True)` ¶

`from_seq(seq, normalize=True, sort=True, **options)` `staticmethod` ¶

`make_hazard(**kwargs)` ¶

`make_pmf(**kwargs)` ¶

`make_same(dist)` ¶

`make_surv(**kwargs)` ¶

`max_dist(n)` ¶

`median()` ¶

`min_dist(n)` ¶

`normalize()` ¶

`sample(n=1, **kwargs)` ¶

`step(**options)` ¶