Chile Earthquake Magnitudes#

Exploring the distribution of earthquake magnitudes in Chile using the relocated catalogue from the Centro Sismológico Nacional (CSN), 1982–mid-2020.

Potin et al. (2024), doi:10.1785/0220240047

# Install empiricaldist if we don't already have it

try:
    import empiricaldist
except ImportError:
    !pip install empiricaldist
from os.path import basename, exists


def download(url):
    filename = basename(url)
    if not exists(filename):
        from urllib.request import urlretrieve

        local, _ = urlretrieve(url, filename)
        print("Downloaded " + local)


download(
    "https://github.com/AllenDowney/ProbablyOverthinkingIt/raw/book/notebooks/utils.py"
)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from empiricaldist import Cdf, Surv
from utils import decorate, set_pyplot_params, make_surv, plot_error_bounds

set_pyplot_params()

Load the catalogue#

Data from Potin et al. (2024): a revised, relocated Chilean seismic catalogue covering 1982 to mid-2020. See chile/chilean_seismic_catalogue-1982-2020-publication_release/README.md for column definitions and citation details.

DATA_DIR = "chile/chilean_seismic_catalogue-1982-2020-publication_release"
SEISMICITY_PATH = f"{DATA_DIR}/CHILE_SEISMICITY_RELOCATED.csv"

data = pd.read_csv(SEISMICITY_PATH)
data["magnitude_type"] = data["magnitude_type"].str.strip()
data.shape
(118004, 13)
data.head()
# year month day hour minute seconds longitude latitude depth rms magnitude magnitude_type
0 0 1982 1 8 0 32 52.05 -71.760237 -32.218361 11.7341 0.95 NaN xx
1 1 1982 1 9 1 6 18.81 -73.147276 -33.919849 5.3619 0.86 NaN xx
2 2 1982 1 9 20 28 47.24 -70.053573 -32.952820 118.5877 0.27 NaN xx
3 3 1982 1 12 15 4 33.20 -70.431024 -32.467427 89.8999 0.08 NaN xx
4 4 1982 1 13 11 20 3.02 -69.627611 -31.559731 46.6019 1.49 NaN xx
data.describe(include="all")
# year month day hour minute seconds longitude latitude depth rms magnitude magnitude_type
count 118004.00000 118004.000000 118004.000000 118004.000000 118004.000000 118004.000000 118004.000000 118004.000000 118004.000000 118004.000000 118004.000000 109310.000000 118004
unique NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 7
top NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN l
freq NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 89168
mean 59001.50000 2008.268915 6.537863 15.628521 11.320065 29.394961 30.028944 -70.540887 -29.325809 68.141672 0.357229 3.177608 NaN
std 34064.96492 9.381915 3.453024 8.782564 6.920552 17.304747 17.315109 1.531607 5.681583 56.104951 0.195222 0.792424 NaN
min 0.00000 1982.000000 1.000000 1.000000 0.000000 0.000000 0.000000 -76.229448 -45.979579 -4.954000 0.010000 0.200000 NaN
25% 29500.75000 2003.000000 4.000000 8.000000 5.000000 14.000000 15.050000 -71.741079 -33.416475 22.039650 0.230000 2.700000 NaN
50% 59001.50000 2010.000000 6.000000 16.000000 11.000000 29.000000 30.070000 -70.707791 -31.664178 52.116300 0.310000 3.100000 NaN
75% 88502.25000 2016.000000 10.000000 23.000000 17.000000 44.000000 45.010000 -69.277223 -23.024681 105.557100 0.440000 3.600000 NaN
max 118003.00000 2020.000000 12.000000 31.000000 23.000000 59.000000 59.990000 -64.810972 -17.817892 336.550300 2.430000 8.800000 NaN

Magnitude is reported on several scales (l local, c coda, w and ww moment magnitude). Many early events have no magnitude estimate.

data["magnitude_type"].value_counts()
magnitude_type
l     89168
c     18629
xx     8694
ww     1238
W       265
B         9
          1
Name: count, dtype: int64
no_mag = data["magnitude"].isna() | (data["magnitude_type"] == "xx")
no_mag.mean()
0.0736754686281821

Keep events with a valid magnitude for the distribution analysis.

events = data.loc[~no_mag].copy()
events.groupby("magnitude_type")["magnitude"].describe()
count mean std min 25% 50% 75% max
magnitude_type
1.0 4.000000 NaN 4.0 4.0 4.0 4.0 4.0
B 9.0 4.811111 0.329562 4.4 4.6 4.8 5.1 5.3
W 265.0 5.167170 1.093272 2.1 4.7 5.2 5.8 8.8
c 18629.0 3.108449 1.172345 0.2 2.8 3.5 3.8 6.4
l 89168.0 3.163719 0.653422 0.6 2.7 3.1 3.5 6.8
ww 1238.0 4.780210 0.484259 3.1 4.5 4.7 5.0 7.6

The number of events per year has been increasing, which suggests better detection.

events["year"].value_counts().sort_index().plot()
decorate(xlabel="Year", ylabel="Event count")
_images/d16c45d90d6fd1f2fa9676ba3a4cfca562019d831d8bfc6e782fe7139fd47bf5.png
for mtype, group in events.groupby("magnitude_type"):
    Cdf.from_seq(group["magnitude"]).plot(label=mtype)

decorate(xlabel="Magnitude", ylabel="CDF")
plt.legend()
plt.savefig("chile_eda_cdf.png", dpi=300)
_images/a806f910c2f13186679e3a1905d634eaeef5bfac54f19469687af60b269d9f39.png

The distributions for the different kinds of magnitudes are substantially different. I’m not sure it’s valid to combine them, but for a first pass analysis, that’s what I’m going to do.

mags = events["magnitude"]
mags.describe()
count    109310.000000
mean          3.177608
std           0.792424
min           0.200000
25%           2.700000
50%           3.100000
75%           3.600000
max           8.800000
Name: magnitude, dtype: float64

As expected, there are only a few very large magnitudes.

(mags >= 7).sum(), (mags >= 7.5).sum(), mags.max()
(11, 6, 8.8)

Here’s the tail distribution (fraction of values >= x for all x).

surv = make_surv(mags)
xlabel = "Magnitude"
ylabel = "Prob >= $x$"
surv.plot()

decorate(xlabel=xlabel, ylabel=ylabel)
_images/50f8ca619dda9deb52d5f8f166ba9939686917e0a144c4a893dc7732328ddbdf.png

Fit a Student t#

Find the df parameter that best matches the tail, then the mu and sigma parameters that best match the percentiles.

from scipy.stats import t as t_dist


def truncated_t_sf(qs, df, mu, sigma):
    ps = t_dist.sf(qs, df, mu, sigma)
    surv_model = Surv(ps / ps[0], qs)
    return surv_model
from scipy.optimize import least_squares


def fit_truncated_t(df, surv):
    """Given df, find the best values of mu and sigma."""
    low, high = surv.qs.min(), surv.qs.max()
    qs_model = np.linspace(low, high, 1000)

    # match 20 percentiles from 10th to 99th
    ps = np.linspace(0.10, 0.9, 20)
    qs = surv.inverse(ps)

    def error_func_t(params, df, surv):
        mu, sigma = params
        surv_model = truncated_t_sf(qs_model, df, mu, sigma)

        error = surv(qs) - surv_model(qs)
        return error

    params = surv.mean(), surv.std()
    res = least_squares(error_func_t, x0=params, args=(df, surv), xtol=1e-3)
    assert res.success
    return res.x
from scipy.optimize import minimize


def minimize_df(df0, surv, bounds=[(1, 1e6)], ps=None):
    low, high = surv.qs.min(), surv.qs.max()
    qs_model = np.linspace(low, high * 1.0, 2000)

    if ps is None:
        t = surv.ps[0], surv.ps[-2]
        t = 0.1, surv.ps[-2]
        low, high = np.log10(t)
        ps = np.logspace(low, high, 30, endpoint=False)

    qs = surv.inverse(ps)

    def error_func_tail(params):
        (df,) = params
        mu, sigma = fit_truncated_t(df, surv)
        surv_model = truncated_t_sf(qs_model, df, mu, sigma)

        errors = np.log10(surv(qs)) - np.log10(surv_model(qs))
        return np.sum(errors**2)

    params = (df0,)
    res = minimize(error_func_tail, x0=params, bounds=bounds, tol=1e-3, method="Powell")
    assert res.success
    return res.x

Search for the value of df that best matches the tail behavior.

df = minimize_df(5, surv, [(1, 1000)])
df
array([8.92478953])

Given that value of df, find mu and sigma to best match percentiles.

mu, sigma = fit_truncated_t(df, surv)
df, mu, sigma
(array([8.92478953]), 3.209750270962042, 0.65453719080302)

Run the model out to magnitude 9.5

low, high = surv.qs.min(), 9.5
qs = np.linspace(low, high, 2000)
surv_model = truncated_t_sf(qs, df, mu, sigma)

On a linear scale:

model_label = "$t$ model"

surv_model.plot(color="gray", alpha=0.4, label=model_label)
surv.plot(color="C1", ls="--", label="Data")

decorate(xlabel=xlabel, ylabel=ylabel)
plt.savefig("chile3.png", dpi=300)
_images/8181b37cf7dfe39c5979e14e9f7e91dd411813c02140235276d80254bcc761ab.png

The model fits the data well above magnitude 3.

Now on a log-y scale:

yticks = [1, 0.1, 0.01, 1e-3, 1e-4, 1e-5]
ylabels = yticks
xticks = np.arange(1, 10)
xlabels = xticks
surv_model.plot(color="gray", alpha=0.4, label=model_label)
plot_error_bounds(Surv(surv_model), mags.count(), color="gray", hatch="/")
surv.plot(color="C2", ls="--", label="Data")

decorate(xlabel=xlabel, ylabel=ylabel, yscale="log")
plt.xticks(xticks, xlabels)
plt.yticks(yticks, ylabels)
plt.savefig("chile4.png", dpi=300)
_images/84b267941b5ee3b2f46a0f39551661476a488889be4f6e4754f8066390b16ce3.png

And both together

def plot_two(surv, surv_model, n, hatch=None, title=""):
    """Plots tail distributions on linear-y and log-y scales.

    Uses global variables model_label, ylabel, xlabel, xticks, yticks, xlabels, ylabels.
    """
    fig, axs = plt.subplots(2, 1, figsize=(6, 5), sharex=True)

    plt.sca(axs[0])
    surv_model.plot(color="gray", alpha=0.4, label=model_label)
    if n < 1000:
        plot_error_bounds(Surv(surv_model), n, color="gray")
    surv.plot(color="C1", ls="--", label="Data")
    decorate(ylabel=ylabel, title=title)

    plt.sca(axs[1])
    surv_model.plot(color="gray", alpha=0.4, label=model_label)
    plot_error_bounds(Surv(surv_model), n, color="gray", hatch=hatch)
    surv.plot(color="C2", ls="--", label="Data")
    decorate(xlabel=xlabel, ylabel=ylabel, yscale="log")
    plt.xticks(xticks, xlabels)
    plt.yticks(yticks, ylabels)

    plt.tight_layout()
    return axs
plot_two(surv, surv_model, mags.count(), hatch="/")
plt.savefig("chile_t_model.png", dpi=300)
_images/834e2f1afd6c11a8aa6eb99e6766b51cbd25f0bb04d6fd310a5a52da57f615fe.png

The model fits the tail behavior well and makes plausible predictions for very large earthquakes.

Copyright 2026 Allen B. Downey

Example related to Chapter 9 from Probably Overthinking It.

License: Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)