Source code for causalboundingengine.algorithms.autobound

import numpy as np
import pandas as pd
from causalboundingengine.algorithms.algorithm import Algorithm

from causalboundingengine.algorithms.autobound_pkg.autobound.causalProblem import causalProblem
from causalboundingengine.algorithms.autobound_pkg.autobound.DAG import DAG
import warnings


[docs] class Autobound(Algorithm): # This module integrates code from the AutoBound project, # developed by Guilherme Duarte, Dean Knox, Jonathan Mummolo, and Ilya Shpitser. # # The original implementation is available at: # https://www.tandfonline.com/doi/full/10.1080/01621459.2023.2216909 # # The code is distributed under the MIT License. # Minor adjustments were made to ensure compatibility within the CausalBoundingEngine. def _compute_ATE(self, X: np.ndarray, Y: np.ndarray, Z: np.ndarray = None, **kwargs) -> tuple[float, float]: if Z is not None: df = pd.DataFrame({'Y': Y, 'X': X, 'Z': Z}) joint_probs = Autobound._compute_joint_probabilities_IV(df) lower, upper = Autobound.run_experiment('ATE', dagstring="Z -> X, X -> Y, U -> X, U -> Y", unob="U", joint_probs=joint_probs) return lower, upper else: df = pd.DataFrame({'Y': Y, 'X': X}) joint_probs = Autobound._compute_joint_probabilities_IV(df) lower, upper = Autobound.run_experiment('ATE', dagstring="X -> Y, U -> X, U -> Y", unob="U", joint_probs=joint_probs) return lower, upper def _compute_PNS(self, X: np.ndarray, Y: np.ndarray, Z: np.ndarray = None, **kwargs) -> tuple[float, float]: if Z is not None: df = pd.DataFrame({'Y': Y, 'X': X, 'Z': Z}) joint_probs = Autobound._compute_joint_probabilities_IV(df) lower, upper = Autobound.run_experiment('PNS', dagstring="Z -> X, X -> Y, U -> X, U -> Y", unob="U", joint_probs=joint_probs) return lower, upper else: df = pd.DataFrame({'Y': Y, 'X': X}) joint_probs = Autobound._compute_joint_probabilities_IV(df) lower, upper = Autobound.run_experiment('PNS', dagstring="X -> Y, U -> X, U -> Y", unob="U", joint_probs=joint_probs) return lower, upper
[docs] @staticmethod def run_experiment(query, dagstring, unob, joint_probs): """ Run the AutoBound experiment. Parameters: dag (DAG): The directed acyclic graph representing the causal structure. df (pd.DataFrame): DataFrame containing the data for the experiment. Returns: tuple: (lower_bound, upper_bound) from AutoBound """ dag = DAG() dag.from_structure(dagstring, unob) problem = causalProblem(dag) problem.load_data_pandas(joint_probs) problem.add_prob_constraints() if query == 'ATE': problem.set_ate(ind='X', dep='Y') elif query == 'PNS': pns_query = problem.query('Y(X=1)=1 & Y(X=0)=0') problem.set_estimand(pns_query) else: raise ValueError("Query must be either 'ATE' or 'PNS'.") program = problem.write_program() lb, ub = program.run_pyomo(solver_name='highs', verbose=False) return lb, ub
@staticmethod def _compute_joint_probabilities_IV(df): """ Computes the joint probabilities for each combination of Z, X, and Y in the input DataFrame. If Z is not present, computes joint probabilities for X and Y only. Parameters: df (pd.DataFrame): Input DataFrame with columns ['X', 'Y'] or ['X', 'Y', 'Z']. Returns: pd.DataFrame: DataFrame with columns ['Z', 'X', 'Y', 'prob'] or ['X', 'Y', 'prob']. """ if 'Z' in df.columns: joint_counts = df.groupby(['Z', 'X', 'Y']).size().reset_index(name='count') total_count = len(df) joint_counts['prob'] = joint_counts['count'] / total_count joint_probs = joint_counts.drop(columns=['count']) else: joint_counts = df.groupby(['X', 'Y']).size().reset_index(name='count') total_count = len(df) joint_counts['prob'] = joint_counts['count'] / total_count joint_probs = joint_counts.drop(columns=['count']) return joint_probs