"
],
"text/plain": [
"1 0.25\n",
"2 0.50\n",
"3 0.75\n",
"4 1.00\n",
"dtype: float64"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d4_copy = d4.copy()\n",
"d4_copy"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d4.index is d4_copy.index"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d4.ps is d4_copy.ps"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Displaying CDFs\n",
"\n",
"For comments or questions about this section, see [this issue](https://github.com/AllenDowney/EmpyricalDistributions/issues/13).\n",
"\n",
"`Cdf` provides `_repr_html_`, so it looks good when displayed in a notebook."
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" def _repr_html_(self):\n",
" \"\"\"Returns an HTML representation of the series.\n",
"\n",
" Mostly used for Jupyter notebooks.\n",
" \"\"\"\n",
" df = pd.DataFrame(dict(probs=self))\n",
" return df._repr_html_()\n",
"\n"
]
}
],
"source": [
"psource(Cdf._repr_html_)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`Cdf` provides `plot`, which plots the Cdf as a line."
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"class PlotAccessor(PandasObject):\n",
" \"\"\"\n",
" Make plots of Series or DataFrame.\n",
"\n",
" Uses the backend specified by the\n",
" option ``plotting.backend``. By default, matplotlib is used.\n",
"\n",
" Parameters\n",
" ----------\n",
" data : Series or DataFrame\n",
" The object for which the method is called.\n",
" x : label or position, default None\n",
" Only used if data is a DataFrame.\n",
" y : label, position or list of label, positions, default None\n",
" Allows plotting of one column versus another. Only used if data is a\n",
" DataFrame.\n",
" kind : str\n",
" The kind of plot to produce:\n",
"\n",
" - 'line' : line plot (default)\n",
" - 'bar' : vertical bar plot\n",
" - 'barh' : horizontal bar plot\n",
" - 'hist' : histogram\n",
" - 'box' : boxplot\n",
" - 'kde' : Kernel Density Estimation plot\n",
" - 'density' : same as 'kde'\n",
" - 'area' : area plot\n",
" - 'pie' : pie plot\n",
" - 'scatter' : scatter plot (DataFrame only)\n",
" - 'hexbin' : hexbin plot (DataFrame only)\n",
" ax : matplotlib axes object, default None\n",
" An axes of the current figure.\n",
" subplots : bool or sequence of iterables, default False\n",
" Whether to group columns into subplots:\n",
"\n",
" - ``False`` : No subplots will be used\n",
" - ``True`` : Make separate subplots for each column.\n",
" - sequence of iterables of column labels: Create a subplot for each\n",
" group of columns. For example `[('a', 'c'), ('b', 'd')]` will\n",
" create 2 subplots: one with columns 'a' and 'c', and one\n",
" with columns 'b' and 'd'. Remaining columns that aren't specified\n",
" will be plotted in additional subplots (one per column).\n",
"\n",
" .. versionadded:: 1.5.0\n",
"\n",
" sharex : bool, default True if ax is None else False\n",
" In case ``subplots=True``, share x axis and set some x axis labels\n",
" to invisible; defaults to True if ax is None otherwise False if\n",
" an ax is passed in; Be aware, that passing in both an ax and\n",
" ``sharex=True`` will alter all x axis labels for all axis in a figure.\n",
" sharey : bool, default False\n",
" In case ``subplots=True``, share y axis and set some y axis labels to invisible.\n",
" layout : tuple, optional\n",
" (rows, columns) for the layout of subplots.\n",
" figsize : a tuple (width, height) in inches\n",
" Size of a figure object.\n",
" use_index : bool, default True\n",
" Use index as ticks for x axis.\n",
" title : str or list\n",
" Title to use for the plot. If a string is passed, print the string\n",
" at the top of the figure. If a list is passed and `subplots` is\n",
" True, print each item in the list above the corresponding subplot.\n",
" grid : bool, default None (matlab style default)\n",
" Axis grid lines.\n",
" legend : bool or {'reverse'}\n",
" Place legend on axis subplots.\n",
" style : list or dict\n",
" The matplotlib line style per column.\n",
" logx : bool or 'sym', default False\n",
" Use log scaling or symlog scaling on x axis.\n",
"\n",
" logy : bool or 'sym' default False\n",
" Use log scaling or symlog scaling on y axis.\n",
"\n",
" loglog : bool or 'sym', default False\n",
" Use log scaling or symlog scaling on both x and y axes.\n",
"\n",
" xticks : sequence\n",
" Values to use for the xticks.\n",
" yticks : sequence\n",
" Values to use for the yticks.\n",
" xlim : 2-tuple/list\n",
" Set the x limits of the current axes.\n",
" ylim : 2-tuple/list\n",
" Set the y limits of the current axes.\n",
" xlabel : label, optional\n",
" Name to use for the xlabel on x-axis. Default uses index name as xlabel, or the\n",
" x-column name for planar plots.\n",
"\n",
" .. versionchanged:: 1.2.0\n",
"\n",
" Now applicable to planar plots (`scatter`, `hexbin`).\n",
"\n",
" .. versionchanged:: 2.0.0\n",
"\n",
" Now applicable to histograms.\n",
"\n",
" ylabel : label, optional\n",
" Name to use for the ylabel on y-axis. Default will show no ylabel, or the\n",
" y-column name for planar plots.\n",
"\n",
" .. versionchanged:: 1.2.0\n",
"\n",
" Now applicable to planar plots (`scatter`, `hexbin`).\n",
"\n",
" .. versionchanged:: 2.0.0\n",
"\n",
" Now applicable to histograms.\n",
"\n",
" rot : float, default None\n",
" Rotation for ticks (xticks for vertical, yticks for horizontal\n",
" plots).\n",
" fontsize : float, default None\n",
" Font size for xticks and yticks.\n",
" colormap : str or matplotlib colormap object, default None\n",
" Colormap to select colors from. If string, load colormap with that\n",
" name from matplotlib.\n",
" colorbar : bool, optional\n",
" If True, plot colorbar (only relevant for 'scatter' and 'hexbin'\n",
" plots).\n",
" position : float\n",
" Specify relative alignments for bar plot layout.\n",
" From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5\n",
" (center).\n",
" table : bool, Series or DataFrame, default False\n",
" If True, draw a table using the data in the DataFrame and the data\n",
" will be transposed to meet matplotlib's default layout.\n",
" If a Series or DataFrame is passed, use passed data to draw a\n",
" table.\n",
" yerr : DataFrame, Series, array-like, dict and str\n",
" See :ref:`Plotting with Error Bars ` for\n",
" detail.\n",
" xerr : DataFrame, Series, array-like, dict and str\n",
" Equivalent to yerr.\n",
" stacked : bool, default False in line and bar plots, and True in area plot\n",
" If True, create stacked plot.\n",
" secondary_y : bool or sequence, default False\n",
" Whether to plot on the secondary y-axis if a list/tuple, which\n",
" columns to plot on secondary y-axis.\n",
" mark_right : bool, default True\n",
" When using a secondary_y axis, automatically mark the column\n",
" labels with \"(right)\" in the legend.\n",
" include_bool : bool, default is False\n",
" If True, boolean values can be plotted.\n",
" backend : str, default None\n",
" Backend to use instead of the backend specified in the option\n",
" ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to\n",
" specify the ``plotting.backend`` for the whole session, set\n",
" ``pd.options.plotting.backend``.\n",
" **kwargs\n",
" Options to pass to matplotlib plotting method.\n",
"\n",
" Returns\n",
" -------\n",
" :class:`matplotlib.axes.Axes` or numpy.ndarray of them\n",
" If the backend is not the default matplotlib one, the return value\n",
" will be the object returned by the backend.\n",
"\n",
" Notes\n",
" -----\n",
" - See matplotlib documentation online for more on this subject\n",
" - If `kind` = 'bar' or 'barh', you can specify relative alignments\n",
" for bar plot layout by `position` keyword.\n",
" From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5\n",
" (center)\n",
"\n",
" Examples\n",
" --------\n",
" For Series:\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> ser = pd.Series([1, 2, 3, 3])\n",
" >>> plot = ser.plot(kind='hist', title=\"My plot\")\n",
"\n",
" For DataFrame:\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> df = pd.DataFrame({'length': [1.5, 0.5, 1.2, 0.9, 3],\n",
" ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]},\n",
" ... index=['pig', 'rabbit', 'duck', 'chicken', 'horse'])\n",
" >>> plot = df.plot(title=\"DataFrame Plot\")\n",
"\n",
" For SeriesGroupBy:\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> lst = [-1, -2, -3, 1, 2, 3]\n",
" >>> ser = pd.Series([1, 2, 2, 4, 6, 6], index=lst)\n",
" >>> plot = ser.groupby(lambda x: x > 0).plot(title=\"SeriesGroupBy Plot\")\n",
"\n",
" For DataFrameGroupBy:\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> df = pd.DataFrame({\"col1\" : [1, 2, 3, 4],\n",
" ... \"col2\" : [\"A\", \"B\", \"A\", \"B\"]})\n",
" >>> plot = df.groupby(\"col2\").plot(kind=\"bar\", title=\"DataFrameGroupBy Plot\")\n",
" \"\"\"\n",
"\n",
" _common_kinds = (\"line\", \"bar\", \"barh\", \"kde\", \"density\", \"area\", \"hist\", \"box\")\n",
" _series_kinds = (\"pie\",)\n",
" _dataframe_kinds = (\"scatter\", \"hexbin\")\n",
" _kind_aliases = {\"density\": \"kde\"}\n",
" _all_kinds = _common_kinds + _series_kinds + _dataframe_kinds\n",
"\n",
" def __init__(self, data) -> None:\n",
" self._parent = data\n",
"\n",
" @staticmethod\n",
" def _get_call_args(backend_name: str, data, args, kwargs):\n",
" \"\"\"\n",
" This function makes calls to this accessor `__call__` method compatible\n",
" with the previous `SeriesPlotMethods.__call__` and\n",
" `DataFramePlotMethods.__call__`. Those had slightly different\n",
" signatures, since `DataFramePlotMethods` accepted `x` and `y`\n",
" parameters.\n",
" \"\"\"\n",
" if isinstance(data, ABCSeries):\n",
" arg_def = [\n",
" (\"kind\", \"line\"),\n",
" (\"ax\", None),\n",
" (\"figsize\", None),\n",
" (\"use_index\", True),\n",
" (\"title\", None),\n",
" (\"grid\", None),\n",
" (\"legend\", False),\n",
" (\"style\", None),\n",
" (\"logx\", False),\n",
" (\"logy\", False),\n",
" (\"loglog\", False),\n",
" (\"xticks\", None),\n",
" (\"yticks\", None),\n",
" (\"xlim\", None),\n",
" (\"ylim\", None),\n",
" (\"rot\", None),\n",
" (\"fontsize\", None),\n",
" (\"colormap\", None),\n",
" (\"table\", False),\n",
" (\"yerr\", None),\n",
" (\"xerr\", None),\n",
" (\"label\", None),\n",
" (\"secondary_y\", False),\n",
" (\"xlabel\", None),\n",
" (\"ylabel\", None),\n",
" ]\n",
" elif isinstance(data, ABCDataFrame):\n",
" arg_def = [\n",
" (\"x\", None),\n",
" (\"y\", None),\n",
" (\"kind\", \"line\"),\n",
" (\"ax\", None),\n",
" (\"subplots\", False),\n",
" (\"sharex\", None),\n",
" (\"sharey\", False),\n",
" (\"layout\", None),\n",
" (\"figsize\", None),\n",
" (\"use_index\", True),\n",
" (\"title\", None),\n",
" (\"grid\", None),\n",
" (\"legend\", True),\n",
" (\"style\", None),\n",
" (\"logx\", False),\n",
" (\"logy\", False),\n",
" (\"loglog\", False),\n",
" (\"xticks\", None),\n",
" (\"yticks\", None),\n",
" (\"xlim\", None),\n",
" (\"ylim\", None),\n",
" (\"rot\", None),\n",
" (\"fontsize\", None),\n",
" (\"colormap\", None),\n",
" (\"table\", False),\n",
" (\"yerr\", None),\n",
" (\"xerr\", None),\n",
" (\"secondary_y\", False),\n",
" (\"xlabel\", None),\n",
" (\"ylabel\", None),\n",
" ]\n",
" else:\n",
" raise TypeError(\n",
" f\"Called plot accessor for type {type(data).__name__}, \"\n",
" \"expected Series or DataFrame\"\n",
" )\n",
"\n",
" if args and isinstance(data, ABCSeries):\n",
" positional_args = str(args)[1:-1]\n",
" keyword_args = \", \".join(\n",
" [f\"{name}={repr(value)}\" for (name, _), value in zip(arg_def, args)]\n",
" )\n",
" msg = (\n",
" \"`Series.plot()` should not be called with positional \"\n",
" \"arguments, only keyword arguments. The order of \"\n",
" \"positional arguments will change in the future. \"\n",
" f\"Use `Series.plot({keyword_args})` instead of \"\n",
" f\"`Series.plot({positional_args})`.\"\n",
" )\n",
" raise TypeError(msg)\n",
"\n",
" pos_args = {name: value for (name, _), value in zip(arg_def, args)}\n",
" if backend_name == \"pandas.plotting._matplotlib\":\n",
" kwargs = dict(arg_def, **pos_args, **kwargs)\n",
" else:\n",
" kwargs = dict(pos_args, **kwargs)\n",
"\n",
" x = kwargs.pop(\"x\", None)\n",
" y = kwargs.pop(\"y\", None)\n",
" kind = kwargs.pop(\"kind\", \"line\")\n",
" return x, y, kind, kwargs\n",
"\n",
" def __call__(self, *args, **kwargs):\n",
" plot_backend = _get_plot_backend(kwargs.pop(\"backend\", None))\n",
"\n",
" x, y, kind, kwargs = self._get_call_args(\n",
" plot_backend.__name__, self._parent, args, kwargs\n",
" )\n",
"\n",
" kind = self._kind_aliases.get(kind, kind)\n",
"\n",
" # when using another backend, get out of the way\n",
" if plot_backend.__name__ != \"pandas.plotting._matplotlib\":\n",
" return plot_backend.plot(self._parent, x=x, y=y, kind=kind, **kwargs)\n",
"\n",
" if kind not in self._all_kinds:\n",
" raise ValueError(f\"{kind} is not a valid plot kind\")\n",
"\n",
" # The original data structured can be transformed before passed to the\n",
" # backend. For example, for DataFrame is common to set the index as the\n",
" # `x` parameter, and return a Series with the parameter `y` as values.\n",
" data = self._parent.copy()\n",
"\n",
" if isinstance(data, ABCSeries):\n",
" kwargs[\"reuse_plot\"] = True\n",
"\n",
" if kind in self._dataframe_kinds:\n",
" if isinstance(data, ABCDataFrame):\n",
" return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs)\n",
" else:\n",
" raise ValueError(f\"plot kind {kind} can only be used for data frames\")\n",
" elif kind in self._series_kinds:\n",
" if isinstance(data, ABCDataFrame):\n",
" if y is None and kwargs.get(\"subplots\") is False:\n",
" raise ValueError(\n",
" f\"{kind} requires either y column or 'subplots=True'\"\n",
" )\n",
" if y is not None:\n",
" if is_integer(y) and not data.columns._holds_integer():\n",
" y = data.columns[y]\n",
" # converted to series actually. copy to not modify\n",
" data = data[y].copy()\n",
" data.index.name = y\n",
" elif isinstance(data, ABCDataFrame):\n",
" data_cols = data.columns\n",
" if x is not None:\n",
" if is_integer(x) and not data.columns._holds_integer():\n",
" x = data_cols[x]\n",
" elif not isinstance(data[x], ABCSeries):\n",
" raise ValueError(\"x must be a label or position\")\n",
" data = data.set_index(x)\n",
" if y is not None:\n",
" # check if we have y as int or list of ints\n",
" int_ylist = is_list_like(y) and all(is_integer(c) for c in y)\n",
" int_y_arg = is_integer(y) or int_ylist\n",
" if int_y_arg and not data.columns._holds_integer():\n",
" y = data_cols[y]\n",
"\n",
" label_kw = kwargs[\"label\"] if \"label\" in kwargs else False\n",
" for kw in [\"xerr\", \"yerr\"]:\n",
" if kw in kwargs and (\n",
" isinstance(kwargs[kw], str) or is_integer(kwargs[kw])\n",
" ):\n",
" try:\n",
" kwargs[kw] = data[kwargs[kw]]\n",
" except (IndexError, KeyError, TypeError):\n",
" pass\n",
"\n",
" # don't overwrite\n",
" data = data[y].copy()\n",
"\n",
" if isinstance(data, ABCSeries):\n",
" label_name = label_kw or y\n",
" data.name = label_name\n",
" else:\n",
" match = is_list_like(label_kw) and len(label_kw) == len(y)\n",
" if label_kw and not match:\n",
" raise ValueError(\n",
" \"label should be list-like and same length as y\"\n",
" )\n",
" label_name = label_kw or data.columns\n",
" data.columns = label_name\n",
"\n",
" return plot_backend.plot(data, kind=kind, **kwargs)\n",
"\n",
" __call__.__doc__ = __doc__\n",
"\n",
" @Appender(\n",
" \"\"\"\n",
" See Also\n",
" --------\n",
" matplotlib.pyplot.plot : Plot y versus x as lines and/or markers.\n",
"\n",
" Examples\n",
" --------\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> s = pd.Series([1, 3, 2])\n",
" >>> s.plot.line() # doctest: +SKIP\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" The following example shows the populations for some animals\n",
" over the years.\n",
"\n",
" >>> df = pd.DataFrame({\n",
" ... 'pig': [20, 18, 489, 675, 1776],\n",
" ... 'horse': [4, 25, 281, 600, 1900]\n",
" ... }, index=[1990, 1997, 2003, 2009, 2014])\n",
" >>> lines = df.plot.line()\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" An example with subplots, so an array of axes is returned.\n",
"\n",
" >>> axes = df.plot.line(subplots=True)\n",
" >>> type(axes)\n",
" \n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" Let's repeat the same example, but specifying colors for\n",
" each column (in this case, for each animal).\n",
"\n",
" >>> axes = df.plot.line(\n",
" ... subplots=True, color={\"pig\": \"pink\", \"horse\": \"#742802\"}\n",
" ... )\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" The following example shows the relationship between both\n",
" populations.\n",
"\n",
" >>> lines = df.plot.line(x='pig', y='horse')\n",
" \"\"\"\n",
" )\n",
" @Substitution(kind=\"line\")\n",
" @Appender(_bar_or_line_doc)\n",
" def line(\n",
" self, x: Hashable | None = None, y: Hashable | None = None, **kwargs\n",
" ) -> PlotAccessor:\n",
" \"\"\"\n",
" Plot Series or DataFrame as lines.\n",
"\n",
" This function is useful to plot lines using DataFrame's values\n",
" as coordinates.\n",
" \"\"\"\n",
" return self(kind=\"line\", x=x, y=y, **kwargs)\n",
"\n",
" @Appender(\n",
" \"\"\"\n",
" See Also\n",
" --------\n",
" DataFrame.plot.barh : Horizontal bar plot.\n",
" DataFrame.plot : Make plots of a DataFrame.\n",
" matplotlib.pyplot.bar : Make a bar plot with matplotlib.\n",
"\n",
" Examples\n",
" --------\n",
" Basic plot.\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> df = pd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]})\n",
" >>> ax = df.plot.bar(x='lab', y='val', rot=0)\n",
"\n",
" Plot a whole dataframe to a bar plot. Each column is assigned a\n",
" distinct color, and each row is nested in a group along the\n",
" horizontal axis.\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]\n",
" >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]\n",
" >>> index = ['snail', 'pig', 'elephant',\n",
" ... 'rabbit', 'giraffe', 'coyote', 'horse']\n",
" >>> df = pd.DataFrame({'speed': speed,\n",
" ... 'lifespan': lifespan}, index=index)\n",
" >>> ax = df.plot.bar(rot=0)\n",
"\n",
" Plot stacked bar charts for the DataFrame\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> ax = df.plot.bar(stacked=True)\n",
"\n",
" Instead of nesting, the figure can be split by column with\n",
" ``subplots=True``. In this case, a :class:`numpy.ndarray` of\n",
" :class:`matplotlib.axes.Axes` are returned.\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> axes = df.plot.bar(rot=0, subplots=True)\n",
" >>> axes[1].legend(loc=2) # doctest: +SKIP\n",
"\n",
" If you don't like the default colours, you can specify how you'd\n",
" like each column to be colored.\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> axes = df.plot.bar(\n",
" ... rot=0, subplots=True, color={\"speed\": \"red\", \"lifespan\": \"green\"}\n",
" ... )\n",
" >>> axes[1].legend(loc=2) # doctest: +SKIP\n",
"\n",
" Plot a single column.\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> ax = df.plot.bar(y='speed', rot=0)\n",
"\n",
" Plot only selected categories for the DataFrame.\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> ax = df.plot.bar(x='lifespan', rot=0)\n",
" \"\"\"\n",
" )\n",
" @Substitution(kind=\"bar\")\n",
" @Appender(_bar_or_line_doc)\n",
" def bar( # pylint: disable=disallowed-name\n",
" self, x: Hashable | None = None, y: Hashable | None = None, **kwargs\n",
" ) -> PlotAccessor:\n",
" \"\"\"\n",
" Vertical bar plot.\n",
"\n",
" A bar plot is a plot that presents categorical data with\n",
" rectangular bars with lengths proportional to the values that they\n",
" represent. A bar plot shows comparisons among discrete categories. One\n",
" axis of the plot shows the specific categories being compared, and the\n",
" other axis represents a measured value.\n",
" \"\"\"\n",
" return self(kind=\"bar\", x=x, y=y, **kwargs)\n",
"\n",
" @Appender(\n",
" \"\"\"\n",
" See Also\n",
" --------\n",
" DataFrame.plot.bar: Vertical bar plot.\n",
" DataFrame.plot : Make plots of DataFrame using matplotlib.\n",
" matplotlib.axes.Axes.bar : Plot a vertical bar plot using matplotlib.\n",
"\n",
" Examples\n",
" --------\n",
" Basic example\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> df = pd.DataFrame({'lab': ['A', 'B', 'C'], 'val': [10, 30, 20]})\n",
" >>> ax = df.plot.barh(x='lab', y='val')\n",
"\n",
" Plot a whole DataFrame to a horizontal bar plot\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]\n",
" >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]\n",
" >>> index = ['snail', 'pig', 'elephant',\n",
" ... 'rabbit', 'giraffe', 'coyote', 'horse']\n",
" >>> df = pd.DataFrame({'speed': speed,\n",
" ... 'lifespan': lifespan}, index=index)\n",
" >>> ax = df.plot.barh()\n",
"\n",
" Plot stacked barh charts for the DataFrame\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> ax = df.plot.barh(stacked=True)\n",
"\n",
" We can specify colors for each column\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> ax = df.plot.barh(color={\"speed\": \"red\", \"lifespan\": \"green\"})\n",
"\n",
" Plot a column of the DataFrame to a horizontal bar plot\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]\n",
" >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]\n",
" >>> index = ['snail', 'pig', 'elephant',\n",
" ... 'rabbit', 'giraffe', 'coyote', 'horse']\n",
" >>> df = pd.DataFrame({'speed': speed,\n",
" ... 'lifespan': lifespan}, index=index)\n",
" >>> ax = df.plot.barh(y='speed')\n",
"\n",
" Plot DataFrame versus the desired column\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]\n",
" >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]\n",
" >>> index = ['snail', 'pig', 'elephant',\n",
" ... 'rabbit', 'giraffe', 'coyote', 'horse']\n",
" >>> df = pd.DataFrame({'speed': speed,\n",
" ... 'lifespan': lifespan}, index=index)\n",
" >>> ax = df.plot.barh(x='lifespan')\n",
" \"\"\"\n",
" )\n",
" @Substitution(kind=\"bar\")\n",
" @Appender(_bar_or_line_doc)\n",
" def barh(\n",
" self, x: Hashable | None = None, y: Hashable | None = None, **kwargs\n",
" ) -> PlotAccessor:\n",
" \"\"\"\n",
" Make a horizontal bar plot.\n",
"\n",
" A horizontal bar plot is a plot that presents quantitative data with\n",
" rectangular bars with lengths proportional to the values that they\n",
" represent. A bar plot shows comparisons among discrete categories. One\n",
" axis of the plot shows the specific categories being compared, and the\n",
" other axis represents a measured value.\n",
" \"\"\"\n",
" return self(kind=\"barh\", x=x, y=y, **kwargs)\n",
"\n",
" def box(self, by: IndexLabel | None = None, **kwargs) -> PlotAccessor:\n",
" r\"\"\"\n",
" Make a box plot of the DataFrame columns.\n",
"\n",
" A box plot is a method for graphically depicting groups of numerical\n",
" data through their quartiles.\n",
" The box extends from the Q1 to Q3 quartile values of the data,\n",
" with a line at the median (Q2). The whiskers extend from the edges\n",
" of box to show the range of the data. The position of the whiskers\n",
" is set by default to 1.5*IQR (IQR = Q3 - Q1) from the edges of the\n",
" box. Outlier points are those past the end of the whiskers.\n",
"\n",
" For further details see Wikipedia's\n",
" entry for `boxplot `__.\n",
"\n",
" A consideration when using this chart is that the box and the whiskers\n",
" can overlap, which is very common when plotting small sets of data.\n",
"\n",
" Parameters\n",
" ----------\n",
" by : str or sequence\n",
" Column in the DataFrame to group by.\n",
"\n",
" .. versionchanged:: 1.4.0\n",
"\n",
" Previously, `by` is silently ignore and makes no groupings\n",
"\n",
" **kwargs\n",
" Additional keywords are documented in\n",
" :meth:`DataFrame.plot`.\n",
"\n",
" Returns\n",
" -------\n",
" :class:`matplotlib.axes.Axes` or numpy.ndarray of them\n",
"\n",
" See Also\n",
" --------\n",
" DataFrame.boxplot: Another method to draw a box plot.\n",
" Series.plot.box: Draw a box plot from a Series object.\n",
" matplotlib.pyplot.boxplot: Draw a box plot in matplotlib.\n",
"\n",
" Examples\n",
" --------\n",
" Draw a box plot from a DataFrame with four columns of randomly\n",
" generated data.\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> data = np.random.randn(25, 4)\n",
" >>> df = pd.DataFrame(data, columns=list('ABCD'))\n",
" >>> ax = df.plot.box()\n",
"\n",
" You can also generate groupings if you specify the `by` parameter (which\n",
" can take a column name, or a list or tuple of column names):\n",
"\n",
" .. versionchanged:: 1.4.0\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85]\n",
" >>> df = pd.DataFrame({\"gender\": list(\"MMMMMMMMFFFFFF\"), \"age\": age_list})\n",
" >>> ax = df.plot.box(column=\"age\", by=\"gender\", figsize=(10, 8))\n",
" \"\"\"\n",
" return self(kind=\"box\", by=by, **kwargs)\n",
"\n",
" def hist(\n",
" self, by: IndexLabel | None = None, bins: int = 10, **kwargs\n",
" ) -> PlotAccessor:\n",
" \"\"\"\n",
" Draw one histogram of the DataFrame's columns.\n",
"\n",
" A histogram is a representation of the distribution of data.\n",
" This function groups the values of all given Series in the DataFrame\n",
" into bins and draws all bins in one :class:`matplotlib.axes.Axes`.\n",
" This is useful when the DataFrame's Series are in a similar scale.\n",
"\n",
" Parameters\n",
" ----------\n",
" by : str or sequence, optional\n",
" Column in the DataFrame to group by.\n",
"\n",
" .. versionchanged:: 1.4.0\n",
"\n",
" Previously, `by` is silently ignore and makes no groupings\n",
"\n",
" bins : int, default 10\n",
" Number of histogram bins to be used.\n",
" **kwargs\n",
" Additional keyword arguments are documented in\n",
" :meth:`DataFrame.plot`.\n",
"\n",
" Returns\n",
" -------\n",
" class:`matplotlib.AxesSubplot`\n",
" Return a histogram plot.\n",
"\n",
" See Also\n",
" --------\n",
" DataFrame.hist : Draw histograms per DataFrame's Series.\n",
" Series.hist : Draw a histogram with Series' data.\n",
"\n",
" Examples\n",
" --------\n",
" When we roll a die 6000 times, we expect to get each value around 1000\n",
" times. But when we roll two dice and sum the result, the distribution\n",
" is going to be quite different. A histogram illustrates those\n",
" distributions.\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> df = pd.DataFrame(\n",
" ... np.random.randint(1, 7, 6000),\n",
" ... columns = ['one'])\n",
" >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000)\n",
" >>> ax = df.plot.hist(bins=12, alpha=0.5)\n",
"\n",
" A grouped histogram can be generated by providing the parameter `by` (which\n",
" can be a column name, or a list of column names):\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85]\n",
" >>> df = pd.DataFrame({\"gender\": list(\"MMMMMMMMFFFFFF\"), \"age\": age_list})\n",
" >>> ax = df.plot.hist(column=[\"age\"], by=\"gender\", figsize=(10, 8))\n",
" \"\"\"\n",
" return self(kind=\"hist\", by=by, bins=bins, **kwargs)\n",
"\n",
" def kde(\n",
" self,\n",
" bw_method: Literal[\"scott\", \"silverman\"] | float | Callable | None = None,\n",
" ind: np.ndarray | int | None = None,\n",
" **kwargs,\n",
" ) -> PlotAccessor:\n",
" \"\"\"\n",
" Generate Kernel Density Estimate plot using Gaussian kernels.\n",
"\n",
" In statistics, `kernel density estimation`_ (KDE) is a non-parametric\n",
" way to estimate the probability density function (PDF) of a random\n",
" variable. This function uses Gaussian kernels and includes automatic\n",
" bandwidth determination.\n",
"\n",
" .. _kernel density estimation:\n",
" https://en.wikipedia.org/wiki/Kernel_density_estimation\n",
"\n",
" Parameters\n",
" ----------\n",
" bw_method : str, scalar or callable, optional\n",
" The method used to calculate the estimator bandwidth. This can be\n",
" 'scott', 'silverman', a scalar constant or a callable.\n",
" If None (default), 'scott' is used.\n",
" See :class:`scipy.stats.gaussian_kde` for more information.\n",
" ind : NumPy array or int, optional\n",
" Evaluation points for the estimated PDF. If None (default),\n",
" 1000 equally spaced points are used. If `ind` is a NumPy array, the\n",
" KDE is evaluated at the points passed. If `ind` is an integer,\n",
" `ind` number of equally spaced points are used.\n",
" **kwargs\n",
" Additional keyword arguments are documented in\n",
" :meth:`DataFrame.plot`.\n",
"\n",
" Returns\n",
" -------\n",
" matplotlib.axes.Axes or numpy.ndarray of them\n",
"\n",
" See Also\n",
" --------\n",
" scipy.stats.gaussian_kde : Representation of a kernel-density\n",
" estimate using Gaussian kernels. This is the function used\n",
" internally to estimate the PDF.\n",
"\n",
" Examples\n",
" --------\n",
" Given a Series of points randomly sampled from an unknown\n",
" distribution, estimate its PDF using KDE with automatic\n",
" bandwidth determination and plot the results, evaluating them at\n",
" 1000 equally spaced points (default):\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5])\n",
" >>> ax = s.plot.kde()\n",
"\n",
" A scalar bandwidth can be specified. Using a small bandwidth value can\n",
" lead to over-fitting, while using a large bandwidth value may result\n",
" in under-fitting:\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> ax = s.plot.kde(bw_method=0.3)\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> ax = s.plot.kde(bw_method=3)\n",
"\n",
" Finally, the `ind` parameter determines the evaluation points for the\n",
" plot of the estimated PDF:\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5])\n",
"\n",
" For DataFrame, it works in the same way:\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> df = pd.DataFrame({\n",
" ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5],\n",
" ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6],\n",
" ... })\n",
" >>> ax = df.plot.kde()\n",
"\n",
" A scalar bandwidth can be specified. Using a small bandwidth value can\n",
" lead to over-fitting, while using a large bandwidth value may result\n",
" in under-fitting:\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> ax = df.plot.kde(bw_method=0.3)\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> ax = df.plot.kde(bw_method=3)\n",
"\n",
" Finally, the `ind` parameter determines the evaluation points for the\n",
" plot of the estimated PDF:\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6])\n",
" \"\"\"\n",
" return self(kind=\"kde\", bw_method=bw_method, ind=ind, **kwargs)\n",
"\n",
" density = kde\n",
"\n",
" def area(\n",
" self,\n",
" x: Hashable | None = None,\n",
" y: Hashable | None = None,\n",
" stacked: bool = True,\n",
" **kwargs,\n",
" ) -> PlotAccessor:\n",
" \"\"\"\n",
" Draw a stacked area plot.\n",
"\n",
" An area plot displays quantitative data visually.\n",
" This function wraps the matplotlib area function.\n",
"\n",
" Parameters\n",
" ----------\n",
" x : label or position, optional\n",
" Coordinates for the X axis. By default uses the index.\n",
" y : label or position, optional\n",
" Column to plot. By default uses all columns.\n",
" stacked : bool, default True\n",
" Area plots are stacked by default. Set to False to create a\n",
" unstacked plot.\n",
" **kwargs\n",
" Additional keyword arguments are documented in\n",
" :meth:`DataFrame.plot`.\n",
"\n",
" Returns\n",
" -------\n",
" matplotlib.axes.Axes or numpy.ndarray\n",
" Area plot, or array of area plots if subplots is True.\n",
"\n",
" See Also\n",
" --------\n",
" DataFrame.plot : Make plots of DataFrame using matplotlib / pylab.\n",
"\n",
" Examples\n",
" --------\n",
" Draw an area plot based on basic business metrics:\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> df = pd.DataFrame({\n",
" ... 'sales': [3, 2, 3, 9, 10, 6],\n",
" ... 'signups': [5, 5, 6, 12, 14, 13],\n",
" ... 'visits': [20, 42, 28, 62, 81, 50],\n",
" ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01',\n",
" ... freq='M'))\n",
" >>> ax = df.plot.area()\n",
"\n",
" Area plots are stacked by default. To produce an unstacked plot,\n",
" pass ``stacked=False``:\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> ax = df.plot.area(stacked=False)\n",
"\n",
" Draw an area plot for a single column:\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> ax = df.plot.area(y='sales')\n",
"\n",
" Draw with a different `x`:\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> df = pd.DataFrame({\n",
" ... 'sales': [3, 2, 3],\n",
" ... 'visits': [20, 42, 28],\n",
" ... 'day': [1, 2, 3],\n",
" ... })\n",
" >>> ax = df.plot.area(x='day')\n",
" \"\"\"\n",
" return self(kind=\"area\", x=x, y=y, stacked=stacked, **kwargs)\n",
"\n",
" def pie(self, **kwargs) -> PlotAccessor:\n",
" \"\"\"\n",
" Generate a pie plot.\n",
"\n",
" A pie plot is a proportional representation of the numerical data in a\n",
" column. This function wraps :meth:`matplotlib.pyplot.pie` for the\n",
" specified column. If no column reference is passed and\n",
" ``subplots=True`` a pie plot is drawn for each numerical column\n",
" independently.\n",
"\n",
" Parameters\n",
" ----------\n",
" y : int or label, optional\n",
" Label or position of the column to plot.\n",
" If not provided, ``subplots=True`` argument must be passed.\n",
" **kwargs\n",
" Keyword arguments to pass on to :meth:`DataFrame.plot`.\n",
"\n",
" Returns\n",
" -------\n",
" matplotlib.axes.Axes or np.ndarray of them\n",
" A NumPy array is returned when `subplots` is True.\n",
"\n",
" See Also\n",
" --------\n",
" Series.plot.pie : Generate a pie plot for a Series.\n",
" DataFrame.plot : Make plots of a DataFrame.\n",
"\n",
" Examples\n",
" --------\n",
" In the example below we have a DataFrame with the information about\n",
" planet's mass and radius. We pass the 'mass' column to the\n",
" pie function to get a pie plot.\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> df = pd.DataFrame({'mass': [0.330, 4.87 , 5.97],\n",
" ... 'radius': [2439.7, 6051.8, 6378.1]},\n",
" ... index=['Mercury', 'Venus', 'Earth'])\n",
" >>> plot = df.plot.pie(y='mass', figsize=(5, 5))\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> plot = df.plot.pie(subplots=True, figsize=(11, 6))\n",
" \"\"\"\n",
" if (\n",
" isinstance(self._parent, ABCDataFrame)\n",
" and kwargs.get(\"y\", None) is None\n",
" and not kwargs.get(\"subplots\", False)\n",
" ):\n",
" raise ValueError(\"pie requires either y column or 'subplots=True'\")\n",
" return self(kind=\"pie\", **kwargs)\n",
"\n",
" def scatter(\n",
" self,\n",
" x: Hashable,\n",
" y: Hashable,\n",
" s: Hashable | Sequence[Hashable] | None = None,\n",
" c: Hashable | Sequence[Hashable] | None = None,\n",
" **kwargs,\n",
" ) -> PlotAccessor:\n",
" \"\"\"\n",
" Create a scatter plot with varying marker point size and color.\n",
"\n",
" The coordinates of each point are defined by two dataframe columns and\n",
" filled circles are used to represent each point. This kind of plot is\n",
" useful to see complex correlations between two variables. Points could\n",
" be for instance natural 2D coordinates like longitude and latitude in\n",
" a map or, in general, any pair of metrics that can be plotted against\n",
" each other.\n",
"\n",
" Parameters\n",
" ----------\n",
" x : int or str\n",
" The column name or column position to be used as horizontal\n",
" coordinates for each point.\n",
" y : int or str\n",
" The column name or column position to be used as vertical\n",
" coordinates for each point.\n",
" s : str, scalar or array-like, optional\n",
" The size of each point. Possible values are:\n",
"\n",
" - A string with the name of the column to be used for marker's size.\n",
"\n",
" - A single scalar so all points have the same size.\n",
"\n",
" - A sequence of scalars, which will be used for each point's size\n",
" recursively. For instance, when passing [2,14] all points size\n",
" will be either 2 or 14, alternatively.\n",
"\n",
" c : str, int or array-like, optional\n",
" The color of each point. Possible values are:\n",
"\n",
" - A single color string referred to by name, RGB or RGBA code,\n",
" for instance 'red' or '#a98d19'.\n",
"\n",
" - A sequence of color strings referred to by name, RGB or RGBA\n",
" code, which will be used for each point's color recursively. For\n",
" instance ['green','yellow'] all points will be filled in green or\n",
" yellow, alternatively.\n",
"\n",
" - A column name or position whose values will be used to color the\n",
" marker points according to a colormap.\n",
"\n",
" **kwargs\n",
" Keyword arguments to pass on to :meth:`DataFrame.plot`.\n",
"\n",
" Returns\n",
" -------\n",
" :class:`matplotlib.axes.Axes` or numpy.ndarray of them\n",
"\n",
" See Also\n",
" --------\n",
" matplotlib.pyplot.scatter : Scatter plot using multiple input data\n",
" formats.\n",
"\n",
" Examples\n",
" --------\n",
" Let's see how to draw a scatter plot using coordinates from the values\n",
" in a DataFrame's columns.\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> df = pd.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1],\n",
" ... [6.4, 3.2, 1], [5.9, 3.0, 2]],\n",
" ... columns=['length', 'width', 'species'])\n",
" >>> ax1 = df.plot.scatter(x='length',\n",
" ... y='width',\n",
" ... c='DarkBlue')\n",
"\n",
" And now with the color determined by a column as well.\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> ax2 = df.plot.scatter(x='length',\n",
" ... y='width',\n",
" ... c='species',\n",
" ... colormap='viridis')\n",
" \"\"\"\n",
" return self(kind=\"scatter\", x=x, y=y, s=s, c=c, **kwargs)\n",
"\n",
" def hexbin(\n",
" self,\n",
" x: Hashable,\n",
" y: Hashable,\n",
" C: Hashable | None = None,\n",
" reduce_C_function: Callable | None = None,\n",
" gridsize: int | tuple[int, int] | None = None,\n",
" **kwargs,\n",
" ) -> PlotAccessor:\n",
" \"\"\"\n",
" Generate a hexagonal binning plot.\n",
"\n",
" Generate a hexagonal binning plot of `x` versus `y`. If `C` is `None`\n",
" (the default), this is a histogram of the number of occurrences\n",
" of the observations at ``(x[i], y[i])``.\n",
"\n",
" If `C` is specified, specifies values at given coordinates\n",
" ``(x[i], y[i])``. These values are accumulated for each hexagonal\n",
" bin and then reduced according to `reduce_C_function`,\n",
" having as default the NumPy's mean function (:meth:`numpy.mean`).\n",
" (If `C` is specified, it must also be a 1-D sequence\n",
" of the same length as `x` and `y`, or a column label.)\n",
"\n",
" Parameters\n",
" ----------\n",
" x : int or str\n",
" The column label or position for x points.\n",
" y : int or str\n",
" The column label or position for y points.\n",
" C : int or str, optional\n",
" The column label or position for the value of `(x, y)` point.\n",
" reduce_C_function : callable, default `np.mean`\n",
" Function of one argument that reduces all the values in a bin to\n",
" a single number (e.g. `np.mean`, `np.max`, `np.sum`, `np.std`).\n",
" gridsize : int or tuple of (int, int), default 100\n",
" The number of hexagons in the x-direction.\n",
" The corresponding number of hexagons in the y-direction is\n",
" chosen in a way that the hexagons are approximately regular.\n",
" Alternatively, gridsize can be a tuple with two elements\n",
" specifying the number of hexagons in the x-direction and the\n",
" y-direction.\n",
" **kwargs\n",
" Additional keyword arguments are documented in\n",
" :meth:`DataFrame.plot`.\n",
"\n",
" Returns\n",
" -------\n",
" matplotlib.AxesSubplot\n",
" The matplotlib ``Axes`` on which the hexbin is plotted.\n",
"\n",
" See Also\n",
" --------\n",
" DataFrame.plot : Make plots of a DataFrame.\n",
" matplotlib.pyplot.hexbin : Hexagonal binning plot using matplotlib,\n",
" the matplotlib function that is used under the hood.\n",
"\n",
" Examples\n",
" --------\n",
" The following examples are generated with random data from\n",
" a normal distribution.\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> n = 10000\n",
" >>> df = pd.DataFrame({'x': np.random.randn(n),\n",
" ... 'y': np.random.randn(n)})\n",
" >>> ax = df.plot.hexbin(x='x', y='y', gridsize=20)\n",
"\n",
" The next example uses `C` and `np.sum` as `reduce_C_function`.\n",
" Note that `'observations'` values ranges from 1 to 5 but the result\n",
" plot shows values up to more than 25. This is because of the\n",
" `reduce_C_function`.\n",
"\n",
" .. plot::\n",
" :context: close-figs\n",
"\n",
" >>> n = 500\n",
" >>> df = pd.DataFrame({\n",
" ... 'coord_x': np.random.uniform(-3, 3, size=n),\n",
" ... 'coord_y': np.random.uniform(30, 50, size=n),\n",
" ... 'observations': np.random.randint(1,5, size=n)\n",
" ... })\n",
" >>> ax = df.plot.hexbin(x='coord_x',\n",
" ... y='coord_y',\n",
" ... C='observations',\n",
" ... reduce_C_function=np.sum,\n",
" ... gridsize=10,\n",
" ... cmap=\"viridis\")\n",
" \"\"\"\n",
" if reduce_C_function is not None:\n",
" kwargs[\"reduce_C_function\"] = reduce_C_function\n",
" if gridsize is not None:\n",
" kwargs[\"gridsize\"] = gridsize\n",
"\n",
" return self(kind=\"hexbin\", x=x, y=y, C=C, **kwargs)\n",
"\n"
]
}
],
"source": [
"psource(Cdf.plot)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"def decorate_dice(title):\n",
" \"\"\"Labels the axes.\n",
" \n",
" title: string\n",
" \"\"\"\n",
" plt.xlabel('Outcome')\n",
" plt.ylabel('CDF')\n",
" plt.title(title)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"