Chile Earthquake Magnitudes

Chile Earthquake Magnitudes#

Exploring the distribution of earthquake magnitudes in Chile using the relocated catalogue from the Centro Sismológico Nacional (CSN), 1982–mid-2020.

Potin et al. (2024), doi:10.1785/0220240047

# Install empiricaldist if we don't already have it

try:
    import empiricaldist
except ImportError:
    !pip install empiricaldist

from os.path import basename, exists


def download(url):
    filename = basename(url)
    if not exists(filename):
        from urllib.request import urlretrieve

        local, _ = urlretrieve(url, filename)
        print("Downloaded " + local)


download(
    "https://github.com/AllenDowney/ProbablyOverthinkingIt/raw/book/notebooks/utils.py"
)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from empiricaldist import Cdf, Surv
from utils import decorate, set_pyplot_params, make_surv, plot_error_bounds

set_pyplot_params()

Load the catalogue#

Data from Potin et al. (2024): a revised, relocated Chilean seismic catalogue covering 1982 to mid-2020. See chile/chilean_seismic_catalogue-1982-2020-publication_release/README.md for column definitions and citation details.

DATA_DIR = "chile/chilean_seismic_catalogue-1982-2020-publication_release"
SEISMICITY_PATH = f"{DATA_DIR}/CHILE_SEISMICITY_RELOCATED.csv"

data = pd.read_csv(SEISMICITY_PATH)
data["magnitude_type"] = data["magnitude_type"].str.strip()
data.shape

(118004, 13)

data.head()

	#	year	month	day	hour	minute	seconds	longitude	latitude	depth	rms	magnitude	magnitude_type
0	0	1982	1	8	0	32	52.05	-71.760237	-32.218361	11.7341	0.95	NaN	xx
1	1	1982	1	9	1	6	18.81	-73.147276	-33.919849	5.3619	0.86	NaN	xx
2	2	1982	1	9	20	28	47.24	-70.053573	-32.952820	118.5877	0.27	NaN	xx
3	3	1982	1	12	15	4	33.20	-70.431024	-32.467427	89.8999	0.08	NaN	xx
4	4	1982	1	13	11	20	3.02	-69.627611	-31.559731	46.6019	1.49	NaN	xx

data.describe(include="all")

	#	year	month	day	hour	minute	seconds	longitude	latitude	depth	rms	magnitude	magnitude_type
count	118004.00000	118004.000000	118004.000000	118004.000000	118004.000000	118004.000000	118004.000000	118004.000000	118004.000000	118004.000000	118004.000000	109310.000000	118004
unique	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	7
top	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	l
freq	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	89168
mean	59001.50000	2008.268915	6.537863	15.628521	11.320065	29.394961	30.028944	-70.540887	-29.325809	68.141672	0.357229	3.177608	NaN
std	34064.96492	9.381915	3.453024	8.782564	6.920552	17.304747	17.315109	1.531607	5.681583	56.104951	0.195222	0.792424	NaN
min	0.00000	1982.000000	1.000000	1.000000	0.000000	0.000000	0.000000	-76.229448	-45.979579	-4.954000	0.010000	0.200000	NaN
25%	29500.75000	2003.000000	4.000000	8.000000	5.000000	14.000000	15.050000	-71.741079	-33.416475	22.039650	0.230000	2.700000	NaN
50%	59001.50000	2010.000000	6.000000	16.000000	11.000000	29.000000	30.070000	-70.707791	-31.664178	52.116300	0.310000	3.100000	NaN
75%	88502.25000	2016.000000	10.000000	23.000000	17.000000	44.000000	45.010000	-69.277223	-23.024681	105.557100	0.440000	3.600000	NaN
max	118003.00000	2020.000000	12.000000	31.000000	23.000000	59.000000	59.990000	-64.810972	-17.817892	336.550300	2.430000	8.800000	NaN

Magnitude is reported on several scales (l local, c coda, w and ww moment magnitude). Many early events have no magnitude estimate.

data["magnitude_type"].value_counts()

magnitude_type
l     89168
c     18629
xx     8694
ww     1238
W       265
B         9
          1
Name: count, dtype: int64

no_mag = data["magnitude"].isna() | (data["magnitude_type"] == "xx")
no_mag.mean()

0.0736754686281821

Keep events with a valid magnitude for the distribution analysis.

events = data.loc[~no_mag].copy()
events.groupby("magnitude_type")["magnitude"].describe()

	count	mean	std	min	25%	50%	75%	max
magnitude_type
	1.0	4.000000	NaN	4.0	4.0	4.0	4.0	4.0
B	9.0	4.811111	0.329562	4.4	4.6	4.8	5.1	5.3
W	265.0	5.167170	1.093272	2.1	4.7	5.2	5.8	8.8
c	18629.0	3.108449	1.172345	0.2	2.8	3.5	3.8	6.4
l	89168.0	3.163719	0.653422	0.6	2.7	3.1	3.5	6.8
ww	1238.0	4.780210	0.484259	3.1	4.5	4.7	5.0	7.6

The number of events per year has been increasing, which suggests better detection.

events["year"].value_counts().sort_index().plot()
decorate(xlabel="Year", ylabel="Event count")

_images/d16c45d90d6fd1f2fa9676ba3a4cfca562019d831d8bfc6e782fe7139fd47bf5.png

for mtype, group in events.groupby("magnitude_type"):
    Cdf.from_seq(group["magnitude"]).plot(label=mtype)

decorate(xlabel="Magnitude", ylabel="CDF")
plt.legend()
plt.savefig("chile_eda_cdf.png", dpi=300)

_images/a806f910c2f13186679e3a1905d634eaeef5bfac54f19469687af60b269d9f39.png

The distributions for the different kinds of magnitudes are substantially different. I’m not sure it’s valid to combine them, but for a first pass analysis, that’s what I’m going to do.

mags = events["magnitude"]
mags.describe()

count    109310.000000
mean          3.177608
std           0.792424
min           0.200000
25%           2.700000
50%           3.100000
75%           3.600000
max           8.800000
Name: magnitude, dtype: float64

As expected, there are only a few very large magnitudes.

(mags >= 7).sum(), (mags >= 7.5).sum(), mags.max()

(11, 6, 8.8)

Here’s the tail distribution (fraction of values >= x for all x).

surv = make_surv(mags)

xlabel = "Magnitude"
ylabel = "Prob >= $x$"

surv.plot()

decorate(xlabel=xlabel, ylabel=ylabel)

_images/50f8ca619dda9deb52d5f8f166ba9939686917e0a144c4a893dc7732328ddbdf.png

Fit a Student t#

Find the df parameter that best matches the tail, then the mu and sigma parameters that best match the percentiles.

from scipy.stats import t as t_dist


def truncated_t_sf(qs, df, mu, sigma):
    ps = t_dist.sf(qs, df, mu, sigma)
    surv_model = Surv(ps / ps[0], qs)
    return surv_model

from scipy.optimize import least_squares


def fit_truncated_t(df, surv):
    """Given df, find the best values of mu and sigma."""
    low, high = surv.qs.min(), surv.qs.max()
    qs_model = np.linspace(low, high, 1000)

    # match 20 percentiles from 10th to 99th
    ps = np.linspace(0.10, 0.9, 20)
    qs = surv.inverse(ps)

    def error_func_t(params, df, surv):
        mu, sigma = params
        surv_model = truncated_t_sf(qs_model, df, mu, sigma)

        error = surv(qs) - surv_model(qs)
        return error

    params = surv.mean(), surv.std()
    res = least_squares(error_func_t, x0=params, args=(df, surv), xtol=1e-3)
    assert res.success
    return res.x

from scipy.optimize import minimize


def minimize_df(df0, surv, bounds=[(1, 1e6)], ps=None):
    low, high = surv.qs.min(), surv.qs.max()
    qs_model = np.linspace(low, high * 1.0, 2000)

    if ps is None:
        t = surv.ps[0], surv.ps[-2]
        t = 0.1, surv.ps[-2]
        low, high = np.log10(t)
        ps = np.logspace(low, high, 30, endpoint=False)

    qs = surv.inverse(ps)

    def error_func_tail(params):
        (df,) = params
        mu, sigma = fit_truncated_t(df, surv)
        surv_model = truncated_t_sf(qs_model, df, mu, sigma)

        errors = np.log10(surv(qs)) - np.log10(surv_model(qs))
        return np.sum(errors**2)

    params = (df0,)
    res = minimize(error_func_tail, x0=params, bounds=bounds, tol=1e-3, method="Powell")
    assert res.success
    return res.x

Search for the value of df that best matches the tail behavior.

df = minimize_df(5, surv, [(1, 1000)])
df

array([8.92478953])

Given that value of df, find mu and sigma to best match percentiles.

mu, sigma = fit_truncated_t(df, surv)
df, mu, sigma

(array([8.92478953]), 3.209750270962042, 0.65453719080302)

Run the model out to magnitude 9.5

low, high = surv.qs.min(), 9.5
qs = np.linspace(low, high, 2000)
surv_model = truncated_t_sf(qs, df, mu, sigma)

On a linear scale:

model_label = "$t$ model"

surv_model.plot(color="gray", alpha=0.4, label=model_label)
surv.plot(color="C1", ls="--", label="Data")

decorate(xlabel=xlabel, ylabel=ylabel)
plt.savefig("chile3.png", dpi=300)

_images/8181b37cf7dfe39c5979e14e9f7e91dd411813c02140235276d80254bcc761ab.png

The model fits the data well above magnitude 3.

Now on a log-y scale:

yticks = [1, 0.1, 0.01, 1e-3, 1e-4, 1e-5]
ylabels = yticks
xticks = np.arange(1, 10)
xlabels = xticks

surv_model.plot(color="gray", alpha=0.4, label=model_label)
plot_error_bounds(Surv(surv_model), mags.count(), color="gray", hatch="/")
surv.plot(color="C2", ls="--", label="Data")

decorate(xlabel=xlabel, ylabel=ylabel, yscale="log")
plt.xticks(xticks, xlabels)
plt.yticks(yticks, ylabels)
plt.savefig("chile4.png", dpi=300)

_images/84b267941b5ee3b2f46a0f39551661476a488889be4f6e4754f8066390b16ce3.png

And both together

def plot_two(surv, surv_model, n, hatch=None, title=""):
    """Plots tail distributions on linear-y and log-y scales.

    Uses global variables model_label, ylabel, xlabel, xticks, yticks, xlabels, ylabels.
    """
    fig, axs = plt.subplots(2, 1, figsize=(6, 5), sharex=True)

    plt.sca(axs[0])
    surv_model.plot(color="gray", alpha=0.4, label=model_label)
    if n < 1000:
        plot_error_bounds(Surv(surv_model), n, color="gray")
    surv.plot(color="C1", ls="--", label="Data")
    decorate(ylabel=ylabel, title=title)

    plt.sca(axs[1])
    surv_model.plot(color="gray", alpha=0.4, label=model_label)
    plot_error_bounds(Surv(surv_model), n, color="gray", hatch=hatch)
    surv.plot(color="C2", ls="--", label="Data")
    decorate(xlabel=xlabel, ylabel=ylabel, yscale="log")
    plt.xticks(xticks, xlabels)
    plt.yticks(yticks, ylabels)

    plt.tight_layout()
    return axs

plot_two(surv, surv_model, mags.count(), hatch="/")
plt.savefig("chile_t_model.png", dpi=300)

_images/834e2f1afd6c11a8aa6eb99e6766b51cbd25f0bb04d6fd310a5a52da57f615fe.png

The model fits the tail behavior well and makes plausible predictions for very large earthquakes.

Example related to Chapter 8 from Probably Overthinking It.

License: Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)

Chile Earthquake Magnitudes

Contents

Chile Earthquake Magnitudes#

Load the catalogue#

Fit a Student t#