Cumulative Distribution Function (Cdf)

A Cdf object represents a mapping from a quantity to the probability that a value from the distribution does not exceed the quantity.

Cdf is a subclass of a Pandas Series, so it has all Series methods, although some are overridden to change their behavior.

Bases: Distribution

Represents a Cumulative Distribution Function (CDF).

Source code in empiricaldist/empiricaldist.py
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
class Cdf(Distribution):
    """Represents a Cumulative Distribution Function (CDF)."""

    def copy(self, deep=True):
        """Make a copy.

        Args:
            deep: whether to make a deep copy

        Returns: new Cdf
        """
        return Cdf(self, copy=deep)

    @staticmethod
    def from_seq(seq, normalize=True, sort=True, **options):
        """Make a CDF from a sequence of values.

        Args:
            seq: iterable
            normalize: whether to normalize the Cdf, default True
            sort: whether to sort the Cdf by values, default True
            options: passed to the pd.Series constructor

        Returns: CDF object
        """
        # if normalize==True, normalize AFTER making the Cdf
        # so the last element is exactly 1.0
        pmf = Pmf.from_seq(seq, normalize=False, sort=sort, **options)
        return pmf.make_cdf(normalize=normalize)

    def step(self, **options):
        """Plot the Cdf as a step function.

        Args:
            options: passed to pd.Series.plot
        """
        underride(options, drawstyle="steps-post")
        self.plot(**options)

    def normalize(self):
        """Make the probabilities add up to 1 (modifies self).

        Returns: normalizing constant
        """
        total = self.ps[-1]
        self /= total
        return total

    @property
    def forward(self, **kwargs):
        """Make a function that computes the forward Cdf.

        Args:
            kwargs: keyword arguments passed to interp1d

        Returns: interpolation function from qs to ps
        """

        underride(
            kwargs,
            kind="previous",
            copy=False,
            assume_sorted=True,
            bounds_error=False,
            fill_value=(0, 1),
        )

        interp = interp1d(self.qs, self.ps, **kwargs)
        return interp

    @property
    def inverse(self, **kwargs):
        """Make a function that computes the inverse Cdf.

        Args:
            kwargs: keyword arguments passed to interp1d

        Returns: interpolation function from ps to qs
        """
        underride(
            kwargs,
            kind="next",
            copy=False,
            assume_sorted=True,
            bounds_error=False,
            fill_value=(self.qs[0], np.nan),
        )

        interp = interp1d(self.ps, self.qs, **kwargs)
        return interp

    # calling a Cdf like a function does forward lookup
    __call__ = forward

    # quantile is the same as an inverse lookup
    quantile = inverse

    def median(self):
        """Median (50th percentile).

        Returns: float
        """
        return self.quantile(0.5)

    def make_pmf(self, **kwargs):
        """Make a Pmf from the Cdf.

        Args:
            normalize: Boolean, whether to normalize the Pmf
            kwargs: passed to the Pmf constructor

        Returns: Pmf
        """
        normalize = kwargs.pop("normalize", False)

        diff = np.diff(self, prepend=0)
        pmf = Pmf(diff, index=self.index.copy(), **kwargs)
        if normalize:
            pmf.normalize()
        return pmf

    def make_surv(self, **kwargs):
        """Make a Surv object from the Cdf.

        Args:
            kwargs: passed to the Surv constructor

        Returns: Surv object
        """
        normalize = kwargs.pop("normalize", False)
        total = self.ps[-1]
        surv = Surv(total - self, **kwargs)
        surv.total = total
        if normalize:
            self.normalize()
        return surv

    def make_hazard(self, **kwargs):
        """Make a Hazard from the Cdf.

        Args:
            kwargs: passed to the Hazard constructor

        Returns: Hazard
        """
        pmf = self.make_pmf()
        surv = self.make_surv()
        haz = Hazard(pmf / (pmf + surv), **kwargs)
        haz.total = getattr(surv, "total", 1.0)
        return haz

    def make_same(self, dist):
        """Convert the given dist to Cdf.

        Args:
            dist: Distribution

        Returns: Cdf
        """
        return dist.make_cdf()

    def sample(self, n=1, **kwargs):
        """Sample with replacement using probabilities as weights.

        Uses the inverse CDF.

        Args:
            n: number of values
            **kwargs: passed to interp1d

        Returns: NumPy array
        """
        ps = np.random.random(n)
        return self.inverse(ps, **kwargs)

    def max_dist(self, n):
        """Distribution of the maximum of `n` values from this distribution.

        Args:
            n: integer

        Returns: Cdf
        """
        ps = self**n
        return Cdf(ps, self.index.copy())

    def min_dist(self, n):
        """Distribution of the minimum of `n` values from this distribution.

        Args:
            n: integer

        Returns: Cdf
        """
        ps = 1 - (1 - self) ** n
        return Cdf(ps, self.index.copy())

forward property

Make a function that computes the forward Cdf.

Parameters:
  • kwargs

    keyword arguments passed to interp1d

Returns: interpolation function from qs to ps

inverse property

Make a function that computes the inverse Cdf.

Parameters:
  • kwargs

    keyword arguments passed to interp1d

Returns: interpolation function from ps to qs

copy(deep=True)

Make a copy.

Parameters:
  • deep

    whether to make a deep copy

Returns: new Cdf

Source code in empiricaldist/empiricaldist.py
925
926
927
928
929
930
931
932
933
def copy(self, deep=True):
    """Make a copy.

    Args:
        deep: whether to make a deep copy

    Returns: new Cdf
    """
    return Cdf(self, copy=deep)

from_seq(seq, normalize=True, sort=True, **options) staticmethod

Make a CDF from a sequence of values.

Parameters:
  • seq

    iterable

  • normalize

    whether to normalize the Cdf, default True

  • sort

    whether to sort the Cdf by values, default True

  • options

    passed to the pd.Series constructor

Returns: CDF object

Source code in empiricaldist/empiricaldist.py
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
@staticmethod
def from_seq(seq, normalize=True, sort=True, **options):
    """Make a CDF from a sequence of values.

    Args:
        seq: iterable
        normalize: whether to normalize the Cdf, default True
        sort: whether to sort the Cdf by values, default True
        options: passed to the pd.Series constructor

    Returns: CDF object
    """
    # if normalize==True, normalize AFTER making the Cdf
    # so the last element is exactly 1.0
    pmf = Pmf.from_seq(seq, normalize=False, sort=sort, **options)
    return pmf.make_cdf(normalize=normalize)

make_hazard(**kwargs)

Make a Hazard from the Cdf.

Parameters:
  • kwargs

    passed to the Hazard constructor

Returns: Hazard

Source code in empiricaldist/empiricaldist.py
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
def make_hazard(self, **kwargs):
    """Make a Hazard from the Cdf.

    Args:
        kwargs: passed to the Hazard constructor

    Returns: Hazard
    """
    pmf = self.make_pmf()
    surv = self.make_surv()
    haz = Hazard(pmf / (pmf + surv), **kwargs)
    haz.total = getattr(surv, "total", 1.0)
    return haz

make_pmf(**kwargs)

Make a Pmf from the Cdf.

Parameters:
  • normalize

    Boolean, whether to normalize the Pmf

  • kwargs

    passed to the Pmf constructor

Returns: Pmf

Source code in empiricaldist/empiricaldist.py
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
def make_pmf(self, **kwargs):
    """Make a Pmf from the Cdf.

    Args:
        normalize: Boolean, whether to normalize the Pmf
        kwargs: passed to the Pmf constructor

    Returns: Pmf
    """
    normalize = kwargs.pop("normalize", False)

    diff = np.diff(self, prepend=0)
    pmf = Pmf(diff, index=self.index.copy(), **kwargs)
    if normalize:
        pmf.normalize()
    return pmf

make_same(dist)

Convert the given dist to Cdf.

Parameters:
  • dist

    Distribution

Returns: Cdf

Source code in empiricaldist/empiricaldist.py
1073
1074
1075
1076
1077
1078
1079
1080
1081
def make_same(self, dist):
    """Convert the given dist to Cdf.

    Args:
        dist: Distribution

    Returns: Cdf
    """
    return dist.make_cdf()

make_surv(**kwargs)

Make a Surv object from the Cdf.

Parameters:
  • kwargs

    passed to the Surv constructor

Returns: Surv object

Source code in empiricaldist/empiricaldist.py
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
def make_surv(self, **kwargs):
    """Make a Surv object from the Cdf.

    Args:
        kwargs: passed to the Surv constructor

    Returns: Surv object
    """
    normalize = kwargs.pop("normalize", False)
    total = self.ps[-1]
    surv = Surv(total - self, **kwargs)
    surv.total = total
    if normalize:
        self.normalize()
    return surv

max_dist(n)

Distribution of the maximum of n values from this distribution.

Parameters:
  • n

    integer

Returns: Cdf

Source code in empiricaldist/empiricaldist.py
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
def max_dist(self, n):
    """Distribution of the maximum of `n` values from this distribution.

    Args:
        n: integer

    Returns: Cdf
    """
    ps = self**n
    return Cdf(ps, self.index.copy())

median()

Median (50th percentile).

Returns: float

Source code in empiricaldist/empiricaldist.py
1019
1020
1021
1022
1023
1024
def median(self):
    """Median (50th percentile).

    Returns: float
    """
    return self.quantile(0.5)

min_dist(n)

Distribution of the minimum of n values from this distribution.

Parameters:
  • n

    integer

Returns: Cdf

Source code in empiricaldist/empiricaldist.py
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
def min_dist(self, n):
    """Distribution of the minimum of `n` values from this distribution.

    Args:
        n: integer

    Returns: Cdf
    """
    ps = 1 - (1 - self) ** n
    return Cdf(ps, self.index.copy())

normalize()

Make the probabilities add up to 1 (modifies self).

Returns: normalizing constant

Source code in empiricaldist/empiricaldist.py
961
962
963
964
965
966
967
968
def normalize(self):
    """Make the probabilities add up to 1 (modifies self).

    Returns: normalizing constant
    """
    total = self.ps[-1]
    self /= total
    return total

sample(n=1, **kwargs)

Sample with replacement using probabilities as weights.

Uses the inverse CDF.

Parameters:
  • n

    number of values

  • **kwargs

    passed to interp1d

Returns: NumPy array

Source code in empiricaldist/empiricaldist.py
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
def sample(self, n=1, **kwargs):
    """Sample with replacement using probabilities as weights.

    Uses the inverse CDF.

    Args:
        n: number of values
        **kwargs: passed to interp1d

    Returns: NumPy array
    """
    ps = np.random.random(n)
    return self.inverse(ps, **kwargs)

step(**options)

Plot the Cdf as a step function.

Parameters:
  • options

    passed to pd.Series.plot

Source code in empiricaldist/empiricaldist.py
952
953
954
955
956
957
958
959
def step(self, **options):
    """Plot the Cdf as a step function.

    Args:
        options: passed to pd.Series.plot
    """
    underride(options, drawstyle="steps-post")
    self.plot(**options)