Examples

This page provides comprehensive examples of using CausalBoundingEngine in various scenarios.

Basic Examples

Example 1: Getting Started

Simple analysis with binary confounded data:

import numpy as np
from causalboundingengine.scenarios import BinaryConf

# Generate synthetic data
np.random.seed(42)
n = 1000

# Simulate confounded treatment assignment
# U is an unmeasured confounder
U = np.random.binomial(1, 0.5, n)  # Hidden confounder
X = np.random.binomial(1, 0.3 + 0.4 * U, n)  # Treatment depends on U
Y = np.random.binomial(1, 0.2 + 0.3 * X + 0.2 * U, n)  # Outcome depends on X and U

# Create scenario (U is not observed)
scenario = BinaryConf(X, Y)

# Compute bounds using different algorithms
print("=== ATE Bounds ===")
print(f"Manski:     {scenario.ATE.manski()}")
print(f"Autobound:  {scenario.ATE.autobound()}")

print("\\n=== PNS Bounds ===")
print(f"Tian-Pearl: {scenario.PNS.tianpearl()}")
print(f"Autobound:  {scenario.PNS.autobound()}")

Example 2: Instrumental Variable Analysis

Using an instrumental variable to get tighter bounds:

import numpy as np
from causalboundingengine.scenarios import BinaryIV

# Generate IV data
np.random.seed(123)
n = 500

# Valid instrument (random assignment)
Z = np.random.binomial(1, 0.5, n)

# Unmeasured confounder
U = np.random.binomial(1, 0.4, n)

# Treatment: influenced by instrument and confounder
X = np.random.binomial(1, 0.2 + 0.3 * Z + 0.3 * U, n)

# Outcome: influenced by treatment and confounder (not directly by instrument)
Y = np.random.binomial(1, 0.3 + 0.4 * X + 0.2 * U, n)

# Create IV scenario
scenario = BinaryIV(X, Y, Z)

print("=== IV Analysis ===")
print(f"Autobound ATE:  {scenario.ATE.autobound()}")
print(f"Autobound PNS:  {scenario.PNS.autobound()}")

# Compare with confounded scenario (ignore instrument)
scenario_conf = BinaryConf(X, Y)
print(f"\\nWithout IV - Manski ATE: {scenario_conf.ATE.manski()}")
print("(IV should give tighter bounds)")

Example 3: Algorithm Comparison

Systematic comparison of multiple algorithms:

import numpy as np
import pandas as pd
from causalboundingengine.scenarios import BinaryConf
import time

# Generate data
np.random.seed(42)
X = np.random.binomial(1, 0.4, 200)
Y = np.random.binomial(1, 0.3 + 0.2 * X, 200)
scenario = BinaryConf(X, Y)

# Compare algorithms
algorithms = ['manski', 'autobound']
results = []

for alg_name in algorithms:
    print(f"Running {alg_name}...")

    start_time = time.time()
    try:
        alg_func = getattr(scenario.ATE, alg_name)
        bounds = alg_func()
        success = True
        error_msg = None
    except Exception as e:
        bounds = (None, None)
        success = False
        error_msg = str(e)
    end_time = time.time()

    results.append({
        'algorithm': alg_name,
        'lower_bound': bounds[0],
        'upper_bound': bounds[1],
        'width': bounds[1] - bounds[0] if success else None,
        'time_seconds': end_time - start_time,
        'success': success,
        'error': error_msg
    })

# Display results
df = pd.DataFrame(results)
print("\\n=== Algorithm Comparison ===")
print(df.to_string(index=False))

Advanced Examples

Example 4: Sensitivity Analysis

Testing sensitivity to different assumptions using EntropyBounds:

import numpy as np
import matplotlib.pyplot as plt
from causalboundingengine.scenarios import BinaryConf

# Generate data with moderate confounding
np.random.seed(42)
n = 500
U = np.random.binomial(1, 0.5, n)
X = np.random.binomial(1, 0.3 + 0.4 * U, n)
Y = np.random.binomial(1, 0.2 + 0.3 * X + 0.3 * U, n)

scenario = BinaryConf(X, Y)

# Test different theta values (information constraints)
theta_values = [0.05, 0.1, 0.2, 0.5, 0.8, 0.95]
results = []

for theta in theta_values:
    try:
        bounds = scenario.ATE.entropybounds(theta=theta)
        results.append({
            'theta': theta,
            'lower': bounds[0],
            'upper': bounds[1],
            'width': bounds[1] - bounds[0]
        })
    except Exception as e:
        print(f"Failed for theta={theta}: {e}")

# Display results
df_sensitivity = pd.DataFrame(results)
print("=== Sensitivity Analysis ===")
print(df_sensitivity.to_string(index=False))

# Plot bounds vs theta
plt.figure(figsize=(10, 6))
plt.plot(df_sensitivity['theta'], df_sensitivity['lower'], 'bo-', label='Lower bound')
plt.plot(df_sensitivity['theta'], df_sensitivity['upper'], 'ro-', label='Upper bound')
plt.fill_between(df_sensitivity['theta'], df_sensitivity['lower'],
                 df_sensitivity['upper'], alpha=0.3, color='gray')
plt.xlabel('Theta (Information Constraint)')
plt.ylabel('ATE Bounds')
plt.title('Sensitivity to Information Constraint')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

Example 5: Bootstrap Confidence Intervals

Adding uncertainty quantification using bootstrap:

import numpy as np
from causalboundingengine.scenarios import BinaryConf
import pandas as pd

def bootstrap_bounds(X, Y, algorithm='manski', n_bootstrap=200, alpha=0.05):
    n = len(X)
    bootstrap_results = []

    for i in range(n_bootstrap):
        # Bootstrap sample
        indices = np.random.choice(n, n, replace=True)
        X_boot = X[indices]
        Y_boot = Y[indices]

        # Compute bounds
        scenario_boot = BinaryConf(X_boot, Y_boot)
        alg_func = getattr(scenario_boot.ATE, algorithm)
        bounds = alg_func()

        bootstrap_results.append({
            'iteration': i,
            'lower': bounds[0],
            'upper': bounds[1]
        })

    # Compute confidence intervals
    df_boot = pd.DataFrame(bootstrap_results)

    lower_ci = (
        np.percentile(df_boot['lower'], 100 * alpha/2),
        np.percentile(df_boot['lower'], 100 * (1 - alpha/2))
    )
    upper_ci = (
        np.percentile(df_boot['upper'], 100 * alpha/2),
        np.percentile(df_boot['upper'], 100 * (1 - alpha/2))
    )

    return {
        'bootstrap_samples': df_boot,
        'lower_bound_ci': lower_ci,
        'upper_bound_ci': upper_ci,
        'alpha': alpha
    }

# Generate data
np.random.seed(42)
X = np.random.binomial(1, 0.4, 100)
Y = np.random.binomial(1, 0.3 + 0.3 * X, 100)

# Bootstrap analysis
print("Computing bootstrap confidence intervals...")
boot_results = bootstrap_bounds(X, Y, algorithm='manski', n_bootstrap=100)

# Original bounds
scenario = BinaryConf(X, Y)
original_bounds = scenario.ATE.manski()

print("=== Bootstrap Results ===")
print(f"Original bounds: {original_bounds}")
print(f"Lower bound 95% CI: {boot_results['lower_bound_ci']}")
print(f"Upper bound 95% CI: {boot_results['upper_bound_ci']}")

Real-World Examples

Example 6: Medical Treatment Analysis

Analyzing treatment effectiveness with potential confounding:

import numpy as np
import pandas as pd
from causalboundingengine.scenarios import BinaryConf

# Simulate medical data
np.random.seed(42)
n = 800

# Patient characteristics (unmeasured severity)
severity = np.random.beta(2, 5, n)  # Most patients have low severity

# Treatment assignment (more severe patients more likely to receive treatment)
treatment_prob = 0.3 + 0.4 * (severity > 0.5)
X = np.random.binomial(1, treatment_prob, n)

# Recovery outcome (depends on treatment and severity)
recovery_prob = 0.4 + 0.3 * X - 0.2 * severity
recovery_prob = np.clip(recovery_prob, 0.05, 0.95)  # Keep probabilities valid
Y = np.random.binomial(1, recovery_prob, n)

# Create DataFrame for analysis
df = pd.DataFrame({
    'treatment': X,
    'recovery': Y,
    'severity': severity  # This would be unmeasured in practice
})

print("=== Medical Treatment Analysis ===")
print(f"Sample size: {n}")
print(f"Treatment rate: {np.mean(X):.3f}")
print(f"Recovery rate: {np.mean(Y):.3f}")
print(f"Recovery rate | Treated: {np.mean(Y[X==1]):.3f}")
print(f"Recovery rate | Control: {np.mean(Y[X==0]):.3f}")
print(f"Naive ATE estimate: {np.mean(Y[X==1]) - np.mean(Y[X==0]):.3f}")

# Causal bounds analysis (ignoring severity as it's unmeasured)
scenario = BinaryConf(X, Y)

print("\\n=== Causal Bounds (accounting for unmeasured confounding) ===")
print(f"Manski bounds:     {scenario.ATE.manski()}")
print(f"Autobound:         {scenario.ATE.autobound()}")

# True ATE (if we could observe severity)
true_ate = np.mean(recovery_prob * 1 - (recovery_prob - 0.3))  # Approximate
print(f"\\nApproximate true ATE: {true_ate:.3f}")

Example 7: Economic Policy Evaluation

Evaluating a job training program with instrumental variable:

import numpy as np
from causalboundingengine.scenarios import BinaryIV, BinaryConf

# Simulate job training program evaluation
np.random.seed(123)
n = 1000

# Random assignment to training eligibility (instrument)
eligible = np.random.binomial(1, 0.5, n)

# Individual motivation (unmeasured confounder)
motivation = np.random.beta(2, 3, n)

# Training participation (influenced by eligibility and motivation)
participation_prob = 0.2 + 0.5 * eligible + 0.3 * motivation
participation_prob = np.clip(participation_prob, 0.05, 0.95)
training = np.random.binomial(1, participation_prob, n)

# Employment outcome (influenced by training and motivation)
employment_prob = 0.4 + 0.25 * training + 0.2 * motivation
employment_prob = np.clip(employment_prob, 0.05, 0.95)
employed = np.random.binomial(1, employment_prob, n)

print("=== Job Training Program Evaluation ===")
print(f"Eligibility rate: {np.mean(eligible):.3f}")
print(f"Training participation rate: {np.mean(training):.3f}")
print(f"Employment rate: {np.mean(employed):.3f}")
print(f"Compliance rate: {np.mean(training[eligible==1]):.3f}")

# IV Analysis
scenario_iv = BinaryIV(training, employed, eligible)
print("\\n=== IV Bounds ===")
print(f"Autobound ATE: {scenario_iv.ATE.autobound()}")

# Compare with confounded analysis
scenario_conf = BinaryConf(training, employed)
print("\\n=== Confounded Analysis (no IV) ===")
print(f"Manski bounds: {scenario_conf.ATE.manski()}")
print("(IV bounds should be tighter if instrument is valid)")

Example 8: Large-Scale Comparison Study

Comprehensive analysis across multiple datasets:

import numpy as np
import pandas as pd
from causalboundingengine.scenarios import BinaryConf
import time

def generate_dataset(n, confounding_strength=0.5, seed=None):
    if seed is not None:
        np.random.seed(seed)

    U = np.random.binomial(1, 0.5, n)
    X_prob = np.clip(0.3 + confounding_strength * U, 0, 1)
    X = np.random.binomial(1, X_prob, n)
    Y_prob = np.clip(0.2 + 0.3 * X + confounding_strength * U, 0, 1)
    Y = np.random.binomial(1, Y_prob, n)

    return X, Y

def analyze_dataset(X, Y, dataset_id):
    scenario = BinaryConf(X, Y)
    algorithms = ['manski', 'autobound']

    results = []
    for alg_name in algorithms:
        start_time = time.time()
        try:
            alg_func = getattr(scenario.ATE, alg_name)
            bounds = alg_func()
            end_time = time.time()

            results.append({
                'dataset_id': dataset_id,
                'algorithm': alg_name,
                'lower_bound': bounds[0],
                'upper_bound': bounds[1],
                'width': bounds[1] - bounds[0],
                'computation_time': end_time - start_time,
                'success': True
            })
        except Exception as e:
            results.append({
                'dataset_id': dataset_id,
                'algorithm': alg_name,
                'lower_bound': None,
                'upper_bound': None,
                'width': None,
                'computation_time': None,
                'success': False
            })

    return results

# Run comparison study
print("=== Large-Scale Comparison Study ===")

# Generate multiple datasets
datasets = []
dataset_configs = [
    {'n': 100, 'confounding': 0.2, 'name': 'Small, Weak confounding'},
    {'n': 100, 'confounding': 0.8, 'name': 'Small, Strong confounding'},
    {'n': 1000, 'confounding': 0.2, 'name': 'Large, Weak confounding'},
    {'n': 1000, 'confounding': 0.8, 'name': 'Large, Strong confounding'},
]

all_results = []
for i, config in enumerate(dataset_configs):
    print(f"Analyzing dataset {i+1}: {config['name']}")

    X, Y = generate_dataset(
        n=config['n'],
        confounding_strength=config['confounding'],
        seed=42 + i
    )

    dataset_results = analyze_dataset(X, Y, i+1)

    # Add dataset metadata
    for result in dataset_results:
        result.update({
            'sample_size': config['n'],
            'confounding_strength': config['confounding'],
            'dataset_name': config['name']
        })

    all_results.extend(dataset_results)

# Compile results
df_results = pd.DataFrame(all_results)

# Summary statistics
print("\\n=== Summary Results ===")
summary = df_results[df_results['success']].groupby(['algorithm', 'confounding_strength']).agg({
    'width': ['mean', 'std'],
    'computation_time': ['mean', 'std']
}).round(4)

print(summary)

# Best performing algorithm by scenario
print("\\n=== Best Algorithm by Scenario (narrowest bounds) ===")
best_by_scenario = df_results[df_results['success']].loc[
    df_results[df_results['success']].groupby(['dataset_id'])['width'].idxmin()
][['dataset_name', 'algorithm', 'width']]

print(best_by_scenario.to_string(index=False))

Specialized Use Cases

Example 9: Custom Algorithm Integration

Using a custom algorithm with the framework:

import numpy as np
from causalboundingengine.algorithms.algorithm import Algorithm
from causalboundingengine.scenarios import BinaryConf

class ConservativeBounds(Algorithm):

    def _compute_ATE(self, X: np.ndarray, Y: np.ndarray,
                    conservatism: float = 0.8, **kwargs) -> tuple[float, float]:

        # Basic observed difference
        p1 = np.mean(Y[X == 1]) if np.any(X == 1) else 0.5
        p0 = np.mean(Y[X == 0]) if np.any(X == 0) else 0.5
        observed_diff = p1 - p0

        # Add conservative margin based on parameter
        margin = conservatism * (1 - abs(observed_diff))

        lower = observed_diff - margin
        upper = observed_diff + margin

        # Ensure bounds are valid
        lower = max(lower, -1.0)
        upper = min(upper, 1.0)

        return float(lower), float(upper)

# Create custom scenario with new algorithm
class CustomBinaryConf(BinaryConf):
    AVAILABLE_ALGORITHMS = {
        **BinaryConf.AVAILABLE_ALGORITHMS,
        'ATE': {
            **BinaryConf.AVAILABLE_ALGORITHMS['ATE'],
            'conservative': ConservativeBounds,
        }
    }

# Use custom scenario
X = np.array([0, 1, 1, 0, 1])
Y = np.array([1, 0, 1, 0, 1])
scenario = CustomBinaryConf(X, Y)

print("=== Custom Algorithm Example ===")
print(f"Standard Manski: {scenario.ATE.manski()}")
print(f"Conservative (0.8): {scenario.ATE.conservative(conservatism=0.8)}")
print(f"Conservative (0.3): {scenario.ATE.conservative(conservatism=0.3)}")

Example 10: Handling External Dependencies Gracefully

Robust code that handles missing R/Java dependencies:

import numpy as np
from causalboundingengine.scenarios import BinaryConf, BinaryIV

def robust_analysis(X, Y, Z=None, prefer_external=True):

    if Z is None:
        scenario = BinaryConf(X, Y)
        available_algorithms = scenario.get_algorithms('ATE')
    else:
        scenario = BinaryIV(X, Y, Z)
        available_algorithms = scenario.get_algorithms('ATE')

    results = {}

    # Priority order: external algorithms first if preferred
    if prefer_external:
        algorithm_priority = ['causaloptim', 'zaffalonbounds', 'autobound', 'manski']
    else:
        algorithm_priority = ['manski', 'autobound', 'causaloptim', 'zaffalonbounds']

    for alg_name in algorithm_priority:
        if alg_name in available_algorithms:
            try:
                alg_func = getattr(scenario.ATE, alg_name)
                bounds = alg_func()
                results[alg_name] = {
                    'bounds': bounds,
                    'status': 'success',
                    'error': None
                }
                print(f"✓ {alg_name}: {bounds}")
            except ImportError as e:
                results[alg_name] = {
                    'bounds': None,
                    'status': 'dependency_missing',
                    'error': str(e)
                }
                print(f"✗ {alg_name}: Missing dependency - {e}")
            except Exception as e:
                results[alg_name] = {
                    'bounds': None,
                    'status': 'failed',
                    'error': str(e)
                }
                print(f"✗ {alg_name}: Failed - {e}")

    return results

# Test with confounded data
np.random.seed(42)
X = np.random.binomial(1, 0.4, 100)
Y = np.random.binomial(1, 0.3 + 0.2 * X, 100)

print("=== Robust Analysis Example ===")
print("Confounded scenario:")
results_conf = robust_analysis(X, Y, prefer_external=True)

# Test with IV data
Z = np.random.binomial(1, 0.5, 100)
print("\\nIV scenario:")
results_iv = robust_analysis(X, Y, Z, prefer_external=True)

# Summary
successful_algorithms = [alg for alg, result in results_conf.items()
                        if result['status'] == 'success']
print(f"\\nSuccessful algorithms: {successful_algorithms}")

These examples demonstrate the flexibility and power of CausalBoundingEngine across various scenarios, from basic usage to advanced applications.