Skip to content

Families Component

This chapter provides implementation details for each distribution family in RustyStats.

Code Location

crates/rustystats-core/src/families/
├── mod.rs              # Family trait definition
├── gaussian.rs         # Gaussian (Normal)
├── poisson.rs          # Poisson
├── binomial.rs         # Binomial
├── gamma.rs            # Gamma
├── tweedie.rs          # Tweedie
├── quasi.rs            # QuasiPoisson, QuasiBinomial
└── negative_binomial.rs # Negative Binomial

The Family Trait

Every family must implement:

pub trait Family: Send + Sync {
    /// Display name
    fn name(&self) -> &str;

    /// Variance function V(μ)
    fn variance(&self, mu: &Array1<f64>) -> Array1<f64>;

    /// Unit deviance d(y, μ) per observation
    fn unit_deviance(&self, y: &Array1<f64>, mu: &Array1<f64>) -> Array1<f64>;

    /// Total deviance (sum of weighted unit deviances)
    fn deviance(&self, y: &Array1<f64>, mu: &Array1<f64>, 
                weights: Option<&Array1<f64>>) -> f64;

    /// Canonical link function
    fn default_link(&self) -> Box<dyn Link>;

    /// Starting values for μ
    fn initialize_mu(&self, y: &Array1<f64>) -> Array1<f64>;

    /// Check if μ values are valid
    fn is_valid_mu(&self, mu: &Array1<f64>) -> bool;
}

Gaussian Family

File: gaussian.rs

Properties

Property Value
Variance \(V(\mu) = 1\)
Deviance \(d(y, \mu) = (y - \mu)^2\)
Canonical link Identity
Valid μ range \((-\infty, +\infty)\)

Implementation

pub struct GaussianFamily;

impl Family for GaussianFamily {
    fn name(&self) -> &str { "Gaussian" }

    fn variance(&self, mu: &Array1<f64>) -> Array1<f64> {
        Array1::ones(mu.len())  // Constant variance
    }

    fn unit_deviance(&self, y: &Array1<f64>, mu: &Array1<f64>) -> Array1<f64> {
        (y - mu).mapv(|r| r * r)  // (y - μ)²
    }

    fn default_link(&self) -> Box<dyn Link> {
        Box::new(IdentityLink)
    }

    fn initialize_mu(&self, y: &Array1<f64>) -> Array1<f64> {
        y.clone()  // Start at observed values
    }

    fn is_valid_mu(&self, _mu: &Array1<f64>) -> bool {
        true  // Any real number is valid
    }
}

Poisson Family

File: poisson.rs

Properties

Property Value
Variance \(V(\mu) = \mu\)
Deviance \(d(y, \mu) = 2[y \log(y/\mu) - (y - \mu)]\)
Canonical link Log
Valid μ range \((0, +\infty)\)

Implementation

pub struct PoissonFamily;

impl Family for PoissonFamily {
    fn name(&self) -> &str { "Poisson" }

    fn variance(&self, mu: &Array1<f64>) -> Array1<f64> {
        mu.clone()  // V(μ) = μ
    }

    fn unit_deviance(&self, y: &Array1<f64>, mu: &Array1<f64>) -> Array1<f64> {
        Zip::from(y).and(mu).map_collect(|&yi, &mui| {
            let mui_safe = mui.max(1e-10);
            if yi > 0.0 {
                2.0 * (yi * (yi / mui_safe).ln() - (yi - mui_safe))
            } else {
                2.0 * mui_safe  // Limit as y → 0
            }
        })
    }

    fn default_link(&self) -> Box<dyn Link> {
        Box::new(LogLink)
    }

    fn initialize_mu(&self, y: &Array1<f64>) -> Array1<f64> {
        y.mapv(|yi| (yi + 0.1).max(0.1))  // Avoid log(0)
    }

    fn is_valid_mu(&self, mu: &Array1<f64>) -> bool {
        mu.iter().all(|&m| m > 0.0)
    }
}

Note: Handling y = 0

When \(y = 0\), the term \(y \log(y/\mu)\) requires special handling:

\[ \lim_{y \to 0} y \log(y/\mu) = 0 \]

So the unit deviance simplifies to \(2\mu\).

Binomial Family

File: binomial.rs

Properties

Property Value
Variance \(V(\mu) = \mu(1-\mu)\)
Deviance \(d(y, \mu) = 2[y \log(y/\mu) + (1-y)\log((1-y)/(1-\mu))]\)
Canonical link Logit
Valid μ range \((0, 1)\)

Implementation

pub struct BinomialFamily;

impl Family for BinomialFamily {
    fn name(&self) -> &str { "Binomial" }

    fn variance(&self, mu: &Array1<f64>) -> Array1<f64> {
        mu.mapv(|m| {
            let m_safe = m.clamp(1e-10, 1.0 - 1e-10);
            m_safe * (1.0 - m_safe)
        })
    }

    fn unit_deviance(&self, y: &Array1<f64>, mu: &Array1<f64>) -> Array1<f64> {
        Zip::from(y).and(mu).map_collect(|&yi, &mui| {
            let mui_safe = mui.clamp(1e-10, 1.0 - 1e-10);
            let yi_safe = yi.clamp(1e-10, 1.0 - 1e-10);

            2.0 * (yi_safe * (yi_safe / mui_safe).ln() 
                   + (1.0 - yi_safe) * ((1.0 - yi_safe) / (1.0 - mui_safe)).ln())
        })
    }

    fn default_link(&self) -> Box<dyn Link> {
        Box::new(LogitLink)
    }

    fn initialize_mu(&self, y: &Array1<f64>) -> Array1<f64> {
        y.mapv(|yi| (yi + 0.5) / 2.0)  // Shrink toward 0.5
    }

    fn is_valid_mu(&self, mu: &Array1<f64>) -> bool {
        mu.iter().all(|&m| m > 0.0 && m < 1.0)
    }
}

Gamma Family

File: gamma.rs

Properties

Property Value
Variance \(V(\mu) = \mu^2\)
Deviance \(d(y, \mu) = 2[-\log(y/\mu) + (y-\mu)/\mu]\)
Canonical link Inverse (\(-1/\mu\))
Common link Log (used in RustyStats)
Valid μ range \((0, +\infty)\)

Implementation

pub struct GammaFamily;

impl Family for GammaFamily {
    fn name(&self) -> &str { "Gamma" }

    fn variance(&self, mu: &Array1<f64>) -> Array1<f64> {
        mu.mapv(|m| m * m)  // V(μ) = μ²
    }

    fn unit_deviance(&self, y: &Array1<f64>, mu: &Array1<f64>) -> Array1<f64> {
        Zip::from(y).and(mu).map_collect(|&yi, &mui| {
            let mui_safe = mui.max(1e-10);
            let yi_safe = yi.max(1e-10);
            2.0 * (-(yi_safe / mui_safe).ln() + (yi_safe - mui_safe) / mui_safe)
        })
    }

    fn default_link(&self) -> Box<dyn Link> {
        Box::new(LogLink)  // Log link, not canonical inverse
    }

    fn initialize_mu(&self, y: &Array1<f64>) -> Array1<f64> {
        y.mapv(|yi| yi.max(0.1))
    }

    fn is_valid_mu(&self, mu: &Array1<f64>) -> bool {
        mu.iter().all(|&m| m > 0.0)
    }
}

Tweedie Family

File: tweedie.rs

Properties

Property Value
Variance \(V(\mu) = \mu^p\) where \(p\) is variance power
Valid \(p\) \(p \leq 0\) or \(p \geq 1\)
Valid μ range \((0, +\infty)\) for \(p > 0\)

Implementation

pub struct TweedieFamily {
    pub var_power: f64,
}

impl TweedieFamily {
    pub fn new(var_power: f64) -> Self {
        assert!(var_power <= 0.0 || var_power >= 1.0,
                "var_power must be <= 0 or >= 1");
        Self { var_power }
    }
}

impl Family for TweedieFamily {
    fn name(&self) -> &str { "Tweedie" }

    fn variance(&self, mu: &Array1<f64>) -> Array1<f64> {
        let p = self.var_power;
        mu.mapv(|m| m.powf(p))
    }

    fn unit_deviance(&self, y: &Array1<f64>, mu: &Array1<f64>) -> Array1<f64> {
        let p = self.var_power;

        Zip::from(y).and(mu).map_collect(|&yi, &mui| {
            let mui_safe = mui.max(1e-10);

            if (p - 1.0).abs() < 1e-10 {
                // p = 1: Poisson limit
                2.0 * (yi * (yi / mui_safe).ln() - (yi - mui_safe))
            } else if (p - 2.0).abs() < 1e-10 {
                // p = 2: Gamma
                2.0 * (-(yi / mui_safe).ln() + (yi - mui_safe) / mui_safe)
            } else {
                // General case
                let term1 = yi.powf(2.0 - p) / ((1.0 - p) * (2.0 - p));
                let term2 = yi * mui_safe.powf(1.0 - p) / (1.0 - p);
                let term3 = mui_safe.powf(2.0 - p) / (2.0 - p);
                2.0 * (term1 - term2 + term3)
            }
        })
    }

    fn default_link(&self) -> Box<dyn Link> {
        Box::new(LogLink)
    }

    // ...
}

Quasi-Families

File: quasi.rs

Quasi-families estimate dispersion from data instead of fixing it at 1.

QuasiPoisson

pub struct QuasiPoissonFamily;

impl Family for QuasiPoissonFamily {
    fn name(&self) -> &str { "QuasiPoisson" }

    fn variance(&self, mu: &Array1<f64>) -> Array1<f64> {
        mu.clone()  // Same as Poisson
    }

    fn unit_deviance(&self, y: &Array1<f64>, mu: &Array1<f64>) -> Array1<f64> {
        // Same deviance formula as Poisson
        // Dispersion is estimated separately
        // ...
    }

    // Same initialization and validation as Poisson
}

The key difference is in how dispersion is handled in PyGLMResults:

fn scale(&self) -> f64 {
    match self.family_name.as_str() {
        "Poisson" | "Binomial" => 1.0,  // Fixed
        "QuasiPoisson" | "QuasiBinomial" => {
            // Estimate from Pearson residuals
            estimate_dispersion_pearson(...)
        }
        _ => self.deviance / self.df_resid() as f64,
    }
}

Negative Binomial Family

File: negative_binomial.rs

Properties

Property Value
Variance \(V(\mu) = \mu + \mu^2/\theta\)
Parameter \(\theta > 0\) controls overdispersion
Valid μ range \((0, +\infty)\)

Implementation

pub struct NegativeBinomialFamily {
    pub theta: f64,
}

impl NegativeBinomialFamily {
    pub fn new(theta: f64) -> Self {
        assert!(theta > 0.0, "theta must be positive");
        Self { theta }
    }

    pub fn alpha(&self) -> f64 {
        1.0 / self.theta  // Alternative parameterization
    }
}

impl Family for NegativeBinomialFamily {
    fn name(&self) -> &str { "NegativeBinomial" }

    fn variance(&self, mu: &Array1<f64>) -> Array1<f64> {
        let theta = self.theta;
        mu.mapv(|m| m + m * m / theta)
    }

    fn unit_deviance(&self, y: &Array1<f64>, mu: &Array1<f64>) -> Array1<f64> {
        let theta = self.theta;

        Zip::from(y).and(mu).map_collect(|&yi, &mui| {
            let mui_safe = mui.max(1e-10);

            let term1 = if yi > 0.0 {
                2.0 * yi * (yi / mui_safe).ln()
            } else {
                0.0
            };

            let term2 = 2.0 * (yi + theta) * 
                ((yi + theta) / (mui_safe + theta)).ln();

            term1 - term2
        })
    }

    fn default_link(&self) -> Box<dyn Link> {
        Box::new(LogLink)
    }

    // ...
}

Theta Estimation

RustyStats can automatically estimate θ:

// In diagnostics module
pub fn estimate_theta_moments(
    y: &Array1<f64>,
    mu: &Array1<f64>,
) -> f64 {
    // Method of moments estimator
    let n = y.len() as f64;
    let mean = mu.mean().unwrap();

    let var = y.iter().zip(mu.iter())
        .map(|(yi, mui)| (yi - mui).powi(2) / mui)
        .sum::<f64>() / n;

    // Var = μ + μ²/θ  →  θ = μ² / (Var - μ)
    let excess_var = (var - 1.0).max(0.01);
    mean / excess_var
}

Testing Families

Each family has unit tests:

#[cfg(test)]
mod tests {
    use super::*;
    use approx::assert_relative_eq;

    #[test]
    fn test_poisson_variance() {
        let family = PoissonFamily;
        let mu = array![1.0, 2.0, 5.0];
        let var = family.variance(&mu);

        assert_eq!(var, mu);  // V(μ) = μ
    }

    #[test]
    fn test_poisson_deviance_zero() {
        let family = PoissonFamily;
        let y = array![0.0];
        let mu = array![1.0];
        let dev = family.unit_deviance(&y, &mu);

        assert_relative_eq!(dev[0], 2.0, epsilon = 1e-10);
    }

    #[test]
    fn test_binomial_variance_bounds() {
        let family = BinomialFamily;
        let mu = array![0.0001, 0.5, 0.9999];
        let var = family.variance(&mu);

        // All variances should be positive and bounded
        for v in var.iter() {
            assert!(*v > 0.0 && *v <= 0.25);
        }
    }
}