latmath.stats — Statistics
Descriptive statistics, histogram binning, moments, and probability distributions — all built on Python’s standard library with no SciPy dependency.
Rationale. By relying only on math, statistics, and math.erf (available since Python 3.8), latmath.stats avoids heavyweight third-party dependencies. This keeps the library lightweight, installable without a compiler toolchain for SciPy, and consistent across platforms. For advanced statistical needs, users can interoperate with SciPy arrays at the boundary.
Descriptive Statistics
from latpy.latmath.stats import describe
Signature |
Description |
|---|---|
|
Return |
describe computes a five-number summary plus count, mean, and sample standard deviation (ddof=1). The 25th, 50th, and 75th percentiles use linear interpolation (type R7, consistent with NumPy’s default).
Examples
from latpy.latmath.array import array
from latpy.latmath.stats import describe
# Standard usage
d = describe(array([1, 2, 3, 4, 5]))
# {
# 'count': 5,
# 'mean': 3.0,
# 'std': 1.5811388300841898,
# 'min': 1,
# '25%': 2.0,
# '50%': 3.0,
# '75%': 4.0,
# 'max': 5
# }
# Single element
describe(array([42]))
# {'count': 1, 'mean': 42.0, 'std': 0.0, 'min': 42,
# '25%': 42.0, '50%': 42.0, '75%': 42.0, 'max': 42}
# Edge: empty array
describe(array([]))
# {'count': 0, 'mean': nan, 'std': nan, 'min': nan,
# '25%': nan, '50%': nan, '75%': nan, 'max': nan}
# Edge: two elements (quartiles collapse)
describe(array([0, 10]))
# {'count': 2, 'mean': 5.0, 'std': 7.071..., 'min': 0,
# '25%': 2.5, '50%': 5.0, '75%': 7.5, 'max': 10}
Histogram
from latpy.latmath.stats import histogram
Signature |
Description |
|---|---|
|
Bin values into equal-width bins; returns NDArray of counts and NDArray of bin edges |
If range_ is None, the range is inferred from min(arr) to max(arr). If provided, values outside range_ are excluded.
Examples
from latpy.latmath.array import array
from latpy.latmath.stats import histogram
# Basic usage
counts, edges = histogram(array([1, 1, 2, 3, 3, 3]), bins=3, range_=(1, 4))
# counts: NDArray([2, 1, 3])
# edges: NDArray([1.0, 2.0, 3.0, 4.0])
# Explicit bins
counts, edges = histogram(array([0, 1, 2, 3, 4, 5]), bins=5)
# counts: NDArray([1, 1, 1, 1, 1])
# edges: NDArray([0.0, 1.0, 2.0, 3.0, 4.0, 5.0])
# Edge: data outside range_ is excluded
counts, edges = histogram(array([-10, 1, 2, 50]), bins=3, range_=(0, 10))
# Only 1 and 2 fall inside [0, 10); -10 and 50 are dropped.
# counts: NDArray([0, 1, 1]) # bin [0, 3.33): 0, [3.33, 6.67): 1, [6.67, 10): 1
# edges: NDArray([0.0, 3.333..., 6.666..., 10.0])
# Edge: empty histogram (no data in range)
counts, edges = histogram(array([1, 2, 3]), range_=(10, 20))
# counts: NDArray([0]) (empty bins vector)
# edges: NDArray([10.0, 20.0])
Moments
from latpy.latmath.stats import skew, kurtosis
Signature |
Description |
|---|---|
|
Sample skewness (third standardized moment) |
|
Excess kurtosis (fourth standardized moment - 3) |
Interpreting skewness:
skew ≈ 0: Symmetric distribution (e.g., normal).
skew > 0: Right-skewed (long tail on the right; mean > median).
skew < 0: Left-skewed (long tail on the left; mean < median).
Interpreting excess kurtosis:
kurtosis ≈ 0: Mesokurtic (normal-like tail weight).
kurtosis > 0: Leptokurtic (heavy tails, sharp peak; more outliers).
kurtosis < 0: Platykurtic (light tails, flat peak; fewer outliers).
Both functions require at least 3 samples for skew and 4 samples for kurtosis; fewer raise StatisticsError (from Python’s stdlib).
Examples
from latpy.latmath.array import array
from latpy.latmath.stats import skew, kurtosis
# Normal-like data (skew ~ 0, kurtosis ~ 0)
data = array([-2, -1, 0, 1, 2])
skew(data) # 0.0
kurtosis(data) # -1.2
# Right-skewed data
right = array([1, 2, 2, 3, 3, 3, 10])
skew(right) # positive (~1.5, indicating a long right tail)
# Left-skewed data
left = array([-10, -3, -3, -2, -2, -2, -1])
skew(left) # negative (~-1.5)
# Edge: skew with fewer than 3 samples
skew(array([1, 2])) # StatisticsError: at least 3 samples required
skew(array([1])) # StatisticsError
skew(array([])) # StatisticsError
# Edge: kurtosis with fewer than 4 samples
kurtosis(array([1, 2, 3])) # StatisticsError: at least 4 samples required
kurtosis(array([1, 2])) # StatisticsError
kurtosis(array([])) # StatisticsError
# Edge: constant data (variance = 0)
skew(array([5, 5, 5, 5])) # StatisticsError (zero variance)
kurtosis(array([5, 5, 5, 5, 5])) # StatisticsError (zero variance)
Probability Functions
from latpy.latmath.stats import norm_pdf, norm_cdf, poisson_pmf, uniform_pdf
Signature |
Description |
|---|---|
|
Normal PDF |
|
Normal CDF (via |
|
Poisson PMF |
|
Uniform PDF |
Examples
from latpy.latmath.stats import norm_pdf, norm_cdf, poisson_pmf, uniform_pdf
# Normal PDF (standard normal)
norm_pdf(0.0) # 0.3989422804014327 (peak of standard normal)
norm_pdf(1.96) # 0.058445...
norm_pdf(-2.5) # 0.017528...
# Normal CDF (via math.erf)
norm_cdf(0.0) # 0.5
norm_cdf(1.96) # 0.975002... (roughly 97.5th percentile)
norm_cdf(-1.96) # 0.024997...
# Edge: extreme values of norm_cdf
norm_cdf(-10) # ~7.62e-24 (effectively 0)
norm_cdf(-100) # 0.0 (floating-point underflow)
norm_cdf(10) # 0.999999... (effectively 1)
norm_cdf(100) # 1.0
# Non-standard parameters
norm_pdf(0.0, loc=5.0, scale=2.0) # 0.008764... (far from center)
norm_cdf(7.0, loc=5.0, scale=2.0) # 0.84134... (1 std above mean)
# Poisson PMF
poisson_pmf(0, lam=1) # 0.367879... (e^{-1})
poisson_pmf(3, lam=5) # 0.140373...
poisson_pmf(10, lam=5) # 0.018132...
# Edge: lam = 0 (zero-rate Poisson = point mass at 0)
poisson_pmf(0, lam=0) # 1.0
poisson_pmf(1, lam=0) # 0.0
poisson_pmf(42, lam=0) # 0.0
# Uniform PDF
uniform_pdf(0.5) # 1.0 (density of U[0,1) is 1)
uniform_pdf(-0.1) # 0.0 (outside support)
uniform_pdf(1.5) # 0.0 (outside support)
uniform_pdf(0.5, lo=2, hi=5) # 0.0 (outside [2,5))
uniform_pdf(3.0, lo=2, hi=5) # 0.333... (density = 1/(5-2))