Quick Start¶
Comparing Two Distributions¶
import numpy as np
from divergence import (
kl_divergence,
energy_distance,
jensen_shannon_divergence,
two_sample_test,
)
rng = np.random.default_rng(42)
p = rng.normal(0, 1, 3000)
q = rng.normal(0.5, 1.2, 3000)
# KL divergence (auto-dispatches to continuous KDE estimator)
kl = kl_divergence(p, q)
# Energy distance (no hyperparameters)
ed = energy_distance(p, q)
# Jensen-Shannon divergence (symmetric, bounded)
jsd = jensen_shannon_divergence(p, q)
# Formal hypothesis test: H0: P = Q
result = two_sample_test(p, q, method="energy", n_permutations=500, seed=42)
print(f"Reject H0? {result.p_value < 0.05} (p = {result.p_value:.4f})")
Measuring Entropy¶
from divergence import entropy, knn_entropy
# KDE-based entropy (1D) — short alias for entropy_from_samples
h_kde = entropy(p)
# kNN-based entropy (scales to high dimensions)
h_knn = knn_entropy(p, k=5)
Discrete Distributions¶
from divergence import discrete_entropy, discrete_mutual_information
labels_x = rng.integers(0, 5, 1000)
labels_y = rng.integers(0, 5, 1000)
h = discrete_entropy(labels_x, base=2) # in bits
mi = discrete_mutual_information(labels_x, labels_y, base=2)
Bayesian Diagnostics (ArviZ)¶
import arviz as az
from divergence import information_gain, chain_divergence
# After running MCMC...
idata = az.from_dict({
"posterior": {"mu": rng.normal(5, 0.5, (4, 1000))},
"prior": {"mu": rng.normal(0, 10, (4, 1000))},
})
# How much did the data update our beliefs?
ig = information_gain(idata)
# Are the chains sampling the same distribution?
cd = chain_divergence(idata)
Next Steps¶
Explore the tutorials for in-depth coverage with visualizations and historical context, or dive into the API reference for the full function catalog.