Today we will need a new env, python3.10 is highly recommended (use pyenv if needed)

```
python3.10 -m venv env_adv
source env_fairness/bin/activate
pip install --upgrade pip
pip install numpy==1.26 fairlearn==0.9.0 plotly==5.24.1 nbformat==5.10.4 aif360['AdversarialDebiasing']==0.6.1 aif360['inFairness']==0.6.1 ipykernel==6.29.5 BlackBoxAuditing==0.1.54 cvxpy==1.6.0 dice-ml==0.11 lime==0.2.0.1
cd env_adv/lib/python3.9/site-packages/aif360/data/raw/meps
Rscipt generate_data.R
```

2 new packages in comparison to TD4
`pip install dice-ml==0.11 lime==0.2.0.1`

# TD 5: Audit de modèles



In this TD the aim is to analyse the decision made by a model.
You will use 3 different methods:
- feature importances with LIME
- black box auditing that consider the features by couple
- counter factual examples with dice-ml

## Import and load the dataset

In [None]:
# imports
import numpy as np
import pandas as pd
import plotly.express as px
import warnings
import matplotlib.pyplot as plt


warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.simplefilter(action="ignore", append=True, category=UserWarning)
# Datasets
from aif360.datasets import MEPSDataset19

# Fairness metrics
from sklearn.metrics import accuracy_score, balanced_accuracy_score
from sklearn.preprocessing import StandardScaler

MEPSDataset19_data = MEPSDataset19()
(dataset_orig_panel19_train, dataset_orig_panel19_val, dataset_orig_panel19_test) = (
    MEPSDataset19().split([0.5, 0.8], shuffle=True)
)

In [None]:
len(dataset_orig_panel19_train.instance_weights), len(
    dataset_orig_panel19_val.instance_weights
), len(dataset_orig_panel19_test.instance_weights)

In [None]:
from aif360.sklearn.metrics import *
from sklearn.metrics import  balanced_accuracy_score

 
# This method takes lists
def get_metrics(
    y_true, # list or np.array of truth values
    y_pred=None,  # list or np.array of predictions
    prot_attr=None, # list or np.array of protected/sensitive attribute values
    priv_group=1, # value taken by the privileged group
    pos_label=1, # value taken by the positive truth/prediction
    sample_weight=None # list or np.array of weights value,
):
    group_metrics = {}
    group_metrics["base_rate_truth"] = base_rate(
        y_true=y_true, pos_label=pos_label, sample_weight=sample_weight
    )
    group_metrics["statistical_parity_difference"] = statistical_parity_difference(
        y_true=y_true, y_pred=y_pred, prot_attr=prot_attr, priv_group=priv_group, pos_label=pos_label, sample_weight=sample_weight
    )
    group_metrics["disparate_impact_ratio"] = disparate_impact_ratio(
        y_true=y_true, y_pred=y_pred, prot_attr=prot_attr, priv_group=priv_group, pos_label=pos_label, sample_weight=sample_weight
    )
    if not y_pred is None:
        group_metrics["base_rate_preds"] = base_rate(
        y_true=y_pred, pos_label=pos_label, sample_weight=sample_weight
        )
        group_metrics["equal_opportunity_difference"] = equal_opportunity_difference(
            y_true=y_true, y_pred=y_pred, prot_attr=prot_attr, priv_group=priv_group, pos_label=pos_label, sample_weight=sample_weight
        )
        group_metrics["average_odds_difference"] = average_odds_difference(
            y_true=y_true, y_pred=y_pred, prot_attr=prot_attr, priv_group=priv_group, pos_label=pos_label, sample_weight=sample_weight
        )
        if len(set(y_pred))>1:
            group_metrics["conditional_demographic_disparity"] = conditional_demographic_disparity(
                y_true=y_true, y_pred=y_pred, prot_attr=prot_attr, pos_label=pos_label, sample_weight=sample_weight
            )
        else:
            group_metrics["conditional_demographic_disparity"] =None
        group_metrics["smoothed_edf"] = smoothed_edf(
        y_true=y_true, y_pred=y_pred, prot_attr=prot_attr, pos_label=pos_label, sample_weight=sample_weight
        )
        group_metrics["df_bias_amplification"] = df_bias_amplification(
        y_true=y_true, y_pred=y_pred, prot_attr=prot_attr, pos_label=pos_label, sample_weight=sample_weight
        )
        group_metrics["balanced_accuracy_score"] = balanced_accuracy_score(
        y_true=y_true, y_pred=y_pred, sample_weight=sample_weight
        )
    return group_metrics

## Utilisation de LIME
### Question 1.1 - apprendre une regression logistique qui prédit l'UTILIZATION (comme dans le TD3)

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline



### Question 1.2 (optionelle) - Observer l'impact du threshold sur les performances de la regression logistique (balanced accuracy et disparate impact)

### Question 1.3 : apprendre un LimeEncoder (nomer l'objet lime_data) sur le dataset AIF360 de train, puis transformer avec ce LimeEncoder le dataset de train et celui de test en s_train et s_test

In [None]:
from aif360.datasets.lime_encoder import LimeEncoder

In [None]:

lime_data = LimeEncoder().fit("TOFILL")

In [None]:
s_train = lime_data.transform("TOFILL")
s_test = lime_data.transform("TOFILL")

### Question 1.4 use LimeTabularExplainer to explain the decision made on several instances of the test dataset.

The code is already written, you just need to 'play' with it, it visualise the features importance on the different predictions

In [None]:
from lime.lime_tabular import LimeTabularExplainer

In [None]:
explainer = LimeTabularExplainer(
        s_train, 
        class_names=lime_data.s_class_names, 
        feature_names=lime_data.s_feature_names,
        categorical_features=lime_data.s_categorical_features, 
        categorical_names=lime_data.s_categorical_names, 
        kernel_width=3, verbose=False, discretize_continuous=True)

In [None]:
def s_predict_fn(x):
    return model.predict_proba(lime_data.inverse_transform(x))

In [None]:
def show_explanation(ind):
    exp = explainer.explain_instance(s_test[ind], s_predict_fn, num_features=10)
    print("Actual label: " + str(dataset_orig_panel19_test.labels[ind]))
    exp.as_pyplot_figure()
    plt.show()

### Question 1.5: Redo the with a regression logistic trained on the Rewieghted dataset

## Question2 Utilisation de BlackBoxAuditing

Attention cette fois, nous nous intéressons aux influences indirectes, cette méthode considères les features par couple.

Aussi transformer les attributs catégoriels en "one hot encoding", n'est cette fois pas une bonne approche car ces colonnes seront par construction très liées entre elles.

Nous allons du coup utiliser un ordinal encoding puis uniquement les classifieurs de sklearn compatible avec les attributs catégoriels ( HistGradientBoostingClassifier).

Il faut dans un premier temps transformer le dataset AIF en dataframe et regrouper les colonnes qui ont déjà été one_hot_encodé (tout cela a déja été fait dans le TD3) puis appliqué un ordinal encoding aux colonnes catégorielles

### Question 2.1 preprocesser la donnée

Afin que vous puissiez passer plus de temps à manipuler les explications, nous vous fournissons le code pour bien formatter le dataframe
vous pouvez passer à la 2.2

In [None]:
from sklearn import preprocessing


def get_df(MepsDataset):
    data = MepsDataset.convert_to_dataframe()
    # data_train est un tuple, avec le data_frame et un dictionnaire avec toutes les infos (poids, attributs sensibles etc)
    df = data[0]
    df["WEIGHT"] = data[1]["instance_weights"]
    # Get categorical column from one hot encoding (specitic to MEPSdataset)
    # Here we create a dictionnary that links each categorical column name
    # to the list of corresponding one hot encoded columns
    categorical_columns_dic = {}
    for col in df.columns:
        col_split = col.split("=")
        if len(col_split) > 1:
            cat_col = col_split[0]
            if not (cat_col in categorical_columns_dic.keys()):
                categorical_columns_dic[cat_col] = []
            categorical_columns_dic[cat_col].append(col)
    categorical_features = categorical_columns_dic.keys()
    print(categorical_features)

    def categorical_transform(df, onehotencoded, cat_col):
        if len(onehotencoded) > 1:
            return df[onehotencoded].apply(
                lambda x: onehotencoded[np.argmax(x)][len(cat_col) + 1 :], axis=1
            )
        else:
            return df[onehotencoded]


    # Reverse the categorical one hot encoded
    for cat_col, onehotencoded in categorical_columns_dic.items():
        df[cat_col] = categorical_transform(df, onehotencoded, cat_col)
        df.drop(columns=onehotencoded, inplace=True)

    encoders = {cat_col:preprocessing.LabelEncoder() for cat_col in categorical_features}

    for cat_col in categorical_features:
        df[cat_col] = encoders[cat_col].fit_transform(df[cat_col])
        print(cat_col)
        for idx in sorted(df[cat_col].unique()):
            print(idx, encoders[cat_col].inverse_transform([idx]))
    return df, encoders


df, encoders = get_df(MEPSDataset19_data)

In [None]:
df

### Question 2.2  Separation train/test du dataframe transformé pour BlackBoxAudit

In [None]:
from sklearn import ensemble
cat_mask = np.array([ col_name in encoders.keys() for col_name in df_X.columns])
clf = ensemble.HistGradientBoostingClassifier(random_state=42, categorical_features=cat_mask)
clf = clf.fit(X_train, y_train)

preds = clf.predict(X_test)

clf.score(X_test, y_test), clf.score(X_train, y_train)

### Question 2.4 utiliser la librairie BlackBoxAuditing pour "auditer" le modèle par l'analyse des influences indirectes de l'age (le calcul prend du temps mais n'hesitez pas à faires d'autres attributs)

Le code est de nouveau fournit, vous avez juste à adapter avec vos notations

Voici la documentaiton de la librairie utilisée
https://github.com/algofairness/BlackBoxAuditing/tree/master

In [None]:
import pickle

# Save your data and model (named clf here) on disk

data_test = X_test.copy(deep=True)
data_test["Y"] = y_test

data_test.to_csv("TD5_test_data.csv",
          index=False)

data_train = X_train.copy(deep=True)
data_train["Y"] = y_train

data_train.to_csv("TD5_train_data.csv",
          index=False)

with open( 'TD5_clf.pickle', 'wb' ) as f:
    pickle.dump(clf, f )

In [None]:
from BlackBoxAuditing.data import load_from_file
from BlackBoxAuditing.model_factories.AbstractModelFactory import AbstractModelFactory
from BlackBoxAuditing.model_factories.AbstractModelVisitor import AbstractModelVisitor

import BlackBoxAuditing as BBA


(_, train_BBA, _, _, _, _) = load_from_file("TD5_train_data.csv",
                      correct_types = [int if col_type=="int" else float for col_type in  data_train.dtypes],
                                response_header = 'Y',
                               train_percentage = 1.0)
(headers, _, test_BBA, response_header, features_to_ignore, correct_types) = load_from_file("TD5_test_data.csv",
                      correct_types = [int if col_type=="int" else float for col_type in  data_test.dtypes],
                                response_header = 'Y',
                               train_percentage = 0.0)
BBA_data = (headers, train_BBA, test_BBA, response_header, features_to_ignore, correct_types)

In [None]:
class HirePredictorBuilder(AbstractModelFactory):
    def __init__(self, *args, **kwargs):
        AbstractModelFactory.__init__(self, *args, **kwargs)
        self.verbose_factory_name = "HirePredictor"
    def build(self, train_set):
        return HirePredictor()

class HirePredictor(AbstractModelVisitor):
    def __init__(self):
        with open( 'TD5_clf.pickle', 'rb' ) as f:
            self.clf = pickle.load(f)

    def test(self, test_set, test_name=""):
        return [[v[-1], self.clf.predict(np.expand_dims(np.array(v[:-1]), axis = 0))] for v in test_set]


In [None]:
features_to_audit = [
    "AGE",
    "SEX",
    "RACE",
    "REGION"
    ]

In [None]:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

auditor = BBA.Auditor()
auditor.ModelFactory = HirePredictorBuilder
auditor(BBA_data, output_dir = "audit-output", features_to_audit=features_to_audit)

### Question 2.5: If you are curious redo the auditing of a model with bias mitigation approach (for example Reweighing)

## Question 3: Generer des exemples contrefactuels en utilisant dice-ml

Voici la documentation de la librairie utilisée
https://github.com/interpretml/DiCE?tab=readme-ov-file


In [None]:
import dice_ml
from dice_ml.utils import helpers

In [None]:
# provide the trained ML model to DiCE's model object
# use the HistGradientBoostingClassifier from the BlackBoxAuditiing
backend = 'sklearn'
m = dice_ml.Model(model=clf, backend=backend)

### Question 3.1 : Create a list with all continuous features

### Question 3.2 ceate a dice_ml Data with the dataframe.

### Question 3.3  use dice to create counterfactual example using the 'random' method

### Question 3.4 Redo the counterfactuals creation using only data statistics not the data itself

In [None]:
# Data privacy, provides only stats of the data, not the data itself
features={}    
for c in data_train.columns:
    if c in continuous_features:
        features[c]=[data_train[c].min(), data_train[c].max()]
    elif c=="Y":
        continue
    else:
        features[c]=data_train[c].unique().tolist()
    features[c].sort()

In [None]:
features

In [None]:
d = dice_ml.Data(features=features,
         outcome_name='Y')

### Question 3.5: If you are curious redo the counter factual example creation with a model with bias mitigation approach (for example Reweighing)