Classification
In [1]:
Copied!
import sys
import pandas as pd
import os
project_root = os.path.abspath(os.path.join(os.getcwd(), "../../"))
sys.path.append(project_root)
data_dir = os.path.join(project_root, 'data')
data_file_path = os.path.join(data_dir, 'RADCURE_processed_clinical.csv')
df = pd.read_csv(data_file_path, index_col=0)
df.drop(columns=["Study ID", "survival_time"], inplace=True)
import sys
import pandas as pd
import os
project_root = os.path.abspath(os.path.join(os.getcwd(), "../../"))
sys.path.append(project_root)
data_dir = os.path.join(project_root, 'data')
data_file_path = os.path.join(data_dir, 'RADCURE_processed_clinical.csv')
df = pd.read_csv(data_file_path, index_col=0)
df.drop(columns=["Study ID", "survival_time"], inplace=True)
In [3]:
Copied!
from jarvais.analyzer import Analyzer
from rich import print
analyzer = Analyzer(
data=df,
output_dir='./outputs/analyzer',
categorical_columns= [
"Sex",
"T Stage",
"N Stage",
"Stage",
"Smoking Status",
"Disease Site",
"death",
"HPV Combined",
"Chemotherapy"
],
continuous_columns = [
"age at dx",
"Dose"
],
target_variable='death',
task='classification'
)
print(analyzer)
analyzer.run()
from jarvais.analyzer import Analyzer
from rich import print
analyzer = Analyzer(
data=df,
output_dir='./outputs/analyzer',
categorical_columns= [
"Sex",
"T Stage",
"N Stage",
"Stage",
"Smoking Status",
"Disease Site",
"death",
"HPV Combined",
"Chemotherapy"
],
continuous_columns = [
"age at dx",
"Dose"
],
target_variable='death',
task='classification'
)
print(analyzer)
analyzer.run()
15:28:54 [warning ] Date columns not specified. Inferring from remaining columns. [jarvais] call=analyzer.__init__:85
Analyzer( AnalyzerSettings( output_dir=PosixPath('outputs/analyzer'), categorical_columns=[ 'Sex', 'T Stage', 'N Stage', 'Stage', 'Smoking Status', 'Disease Site', 'death', 'HPV Combined', 'Chemotherapy' ], continuous_columns=['age at dx', 'Dose'], date_columns=[], task='classification', target_variable='death', generate_report=True, settings_path=None, settings_schema_path=None, missingness=MissingnessModule( enabled=True, categorical_strategy={ 'Sex': 'unknown', 'T Stage': 'unknown', 'N Stage': 'unknown', 'Stage': 'unknown', 'Smoking Status': 'unknown', 'Disease Site': 'unknown', 'death': 'unknown', 'HPV Combined': 'unknown', 'Chemotherapy': 'unknown' }, continuous_strategy={'age at dx': 'median', 'Dose': 'median'} ), outlier=OutlierModule( enabled=True, categorical_strategy={ 'Sex': 'frequency', 'T Stage': 'frequency', 'N Stage': 'frequency', 'Stage': 'frequency', 'Smoking Status': 'frequency', 'Disease Site': 'frequency', 'death': 'frequency', 'HPV Combined': 'frequency', 'Chemotherapy': 'frequency' }, continuous_strategy={'age at dx': 'none', 'Dose': 'none'}, threshold=0.01, categorical_mapping={}, group_outliers=True ), visualization=DataVisualizationModule( enabled=True, plots=['corr', 'pairplot', 'umap', 'frequency_table', 'multiplot'], save_to_json=False ), boolean=BooleanEncodingModule(enabled=True, columns=[]), dashboard=DashboardModule(enabled=True, n_top=10, significance_threshold=0.05) ) )
15:28:55 [info ] Performing missingness analysis... [jarvais] call=missingness.__call__:40 [info ] Performing outlier analysis... [jarvais] call=outlier.__call__:63 [info ] Plotting Correlation Matrix... [jarvais] call=visualization.__call__:122
+-----------------------+-------------------+-----------+-------------+ | | | Missing | Overall | +=======================+===================+===========+=============+ | n | | | 3346 | +-----------------------+-------------------+-----------+-------------+ | age at dx, mean (SD) | | 0 | 62.3 (11.6) | +-----------------------+-------------------+-----------+-------------+ | Dose, mean (SD) | | 0 | 66.7 (5.8) | +-----------------------+-------------------+-----------+-------------+ | Sex, n (%) | Female | | 686 (20.5) | +-----------------------+-------------------+-----------+-------------+ | | Male | | 2660 (79.5) | +-----------------------+-------------------+-----------+-------------+ | T Stage, n (%) | None | | 12 (0.4) | +-----------------------+-------------------+-----------+-------------+ | | T0 | | 167 (5.0) | +-----------------------+-------------------+-----------+-------------+ | | T1 | | 454 (13.6) | +-----------------------+-------------------+-----------+-------------+ | | T1 (2) | | 1 (0.0) | +-----------------------+-------------------+-----------+-------------+ | | T1a | | 179 (5.3) | +-----------------------+-------------------+-----------+-------------+ | | T1b | | 88 (2.6) | +-----------------------+-------------------+-----------+-------------+ | | T2 | | 927 (27.7) | +-----------------------+-------------------+-----------+-------------+ | | T2 (2) | | 1 (0.0) | +-----------------------+-------------------+-----------+-------------+ | | T2a | | 4 (0.1) | +-----------------------+-------------------+-----------+-------------+ | | T2b | | 5 (0.1) | +-----------------------+-------------------+-----------+-------------+ | | T3 | | 861 (25.7) | +-----------------------+-------------------+-----------+-------------+ | | T3 (2) | | 3 (0.1) | +-----------------------+-------------------+-----------+-------------+ | | T4 | | 116 (3.5) | +-----------------------+-------------------+-----------+-------------+ | | T4a | | 358 (10.7) | +-----------------------+-------------------+-----------+-------------+ | | T4b | | 121 (3.6) | +-----------------------+-------------------+-----------+-------------+ | | TX | | 4 (0.1) | +-----------------------+-------------------+-----------+-------------+ | | Tis | | 44 (1.3) | +-----------------------+-------------------+-----------+-------------+ | | rT0 | | 1 (0.0) | +-----------------------+-------------------+-----------+-------------+ | N Stage, n (%) | N0 | | 1147 (34.3) | +-----------------------+-------------------+-----------+-------------+ | | N1 | | 344 (10.3) | +-----------------------+-------------------+-----------+-------------+ | | N2 | | 182 (5.4) | +-----------------------+-------------------+-----------+-------------+ | | N2a | | 125 (3.7) | +-----------------------+-------------------+-----------+-------------+ | | N2b | | 791 (23.6) | +-----------------------+-------------------+-----------+-------------+ | | N2c | | 532 (15.9) | +-----------------------+-------------------+-----------+-------------+ | | N3 | | 170 (5.1) | +-----------------------+-------------------+-----------+-------------+ | | N3a | | 13 (0.4) | +-----------------------+-------------------+-----------+-------------+ | | N3b | | 28 (0.8) | +-----------------------+-------------------+-----------+-------------+ | | NX | | 1 (0.0) | +-----------------------+-------------------+-----------+-------------+ | | None | | 13 (0.4) | +-----------------------+-------------------+-----------+-------------+ | Stage, n (%) | 0 | | 44 (1.3) | +-----------------------+-------------------+-----------+-------------+ | | I | | 352 (10.5) | +-----------------------+-------------------+-----------+-------------+ | | IB | | 1 (0.0) | +-----------------------+-------------------+-----------+-------------+ | | II | | 400 (12.0) | +-----------------------+-------------------+-----------+-------------+ | | IIA | | 2 (0.1) | +-----------------------+-------------------+-----------+-------------+ | | IIB | | 1 (0.0) | +-----------------------+-------------------+-----------+-------------+ | | III | | 605 (18.1) | +-----------------------+-------------------+-----------+-------------+ | | IIIA | | 2 (0.1) | +-----------------------+-------------------+-----------+-------------+ | | IIIC | | 2 (0.1) | +-----------------------+-------------------+-----------+-------------+ | | IV | | 12 (0.4) | +-----------------------+-------------------+-----------+-------------+ | | IVA | | 1581 (47.3) | +-----------------------+-------------------+-----------+-------------+ | | IVB | | 309 (9.2) | +-----------------------+-------------------+-----------+-------------+ | | IVC | | 2 (0.1) | +-----------------------+-------------------+-----------+-------------+ | | None | | 27 (0.8) | +-----------------------+-------------------+-----------+-------------+ | | X | | 6 (0.2) | +-----------------------+-------------------+-----------+-------------+ | Smoking Status, n (%) | Current | | 1139 (34.0) | +-----------------------+-------------------+-----------+-------------+ | | Ex-smoker | | 1290 (38.6) | +-----------------------+-------------------+-----------+-------------+ | | Non-smoker | | 872 (26.1) | +-----------------------+-------------------+-----------+-------------+ | | unknown | | 45 (1.3) | +-----------------------+-------------------+-----------+-------------+ | Disease Site, n (%) | benign tumor | | 1 (0.0) | +-----------------------+-------------------+-----------+-------------+ | | esophagus | | 33 (1.0) | +-----------------------+-------------------+-----------+-------------+ | | hypopharynx | | 162 (4.8) | +-----------------------+-------------------+-----------+-------------+ | | lacrimal gland | | 1 (0.0) | +-----------------------+-------------------+-----------+-------------+ | | larynx | | 877 (26.2) | +-----------------------+-------------------+-----------+-------------+ | | lip & oral cavity | | 100 (3.0) | +-----------------------+-------------------+-----------+-------------+ | | nasal cavity | | 62 (1.9) | +-----------------------+-------------------+-----------+-------------+ | | nasopharynx | | 355 (10.6) | +-----------------------+-------------------+-----------+-------------+ | | orbit | | 1 (0.0) | +-----------------------+-------------------+-----------+-------------+ | | oropharynx | | 1501 (44.9) | +-----------------------+-------------------+-----------+-------------+ | | other | | 2 (0.1) | +-----------------------+-------------------+-----------+-------------+ | | paraganglioma | | 7 (0.2) | +-----------------------+-------------------+-----------+-------------+ | | paranasal sinus | | 28 (0.8) | +-----------------------+-------------------+-----------+-------------+ | | salivary glands | | 4 (0.1) | +-----------------------+-------------------+-----------+-------------+ | | sarcoma | | 20 (0.6) | +-----------------------+-------------------+-----------+-------------+ | | skin | | 24 (0.7) | +-----------------------+-------------------+-----------+-------------+ | | unknown | | 168 (5.0) | +-----------------------+-------------------+-----------+-------------+ | death, n (%) | 0 | | 2288 (68.4) | +-----------------------+-------------------+-----------+-------------+ | | 1 | | 1058 (31.6) | +-----------------------+-------------------+-----------+-------------+ | HPV Combined, n (%) | 1.0 | | 1139 (34.0) | +-----------------------+-------------------+-----------+-------------+ | | None | | 2207 (66.0) | +-----------------------+-------------------+-----------+-------------+ | Chemotherapy, n (%) | 0 | | 1923 (57.5) | +-----------------------+-------------------+-----------+-------------+ | | 1 | | 1423 (42.5) | +-----------------------+-------------------+-----------+-------------+ Outlier Report: - No Outliers found in Sex - Outliers found in T Stage: ['nan: 12 out of 3346', 'T2b: 5 out of 3346', 'T2a: 4 out of 3346', 'TX: 4 out of 3346', 'T3 (2): 3 out of 3346', 'T2 (2): 1 out of 3346', 'T1 (2): 1 out of 3346', 'rT0: 1 out of 3346'] - Outliers found in N Stage: ['N3b: 28 out of 3346', 'N3a: 13 out of 3346', 'nan: 13 out of 3346', 'NX: 1 out of 3346'] - Outliers found in Stage: ['nan: 27 out of 3346', 'IV: 12 out of 3346', 'X: 6 out of 3346', 'IIA: 2 out of 3346', 'IIIA: 2 out of 3346', 'IIIC: 2 out of 3346', 'IVC: 2 out of 3346', 'IB: 1 out of 3346', 'IIB: 1 out of 3346'] - No Outliers found in Smoking Status - Outliers found in Disease Site: ['paranasal sinus: 28 out of 3346', 'skin: 24 out of 3346', 'sarcoma: 20 out of 3346', 'paraganglioma: 7 out of 3346', 'salivary glands: 4 out of 3346', 'other: 2 out of 3346', 'benign tumor: 1 out of 3346', 'lacrimal gland: 1 out of 3346', 'orbit: 1 out of 3346'] - No Outliers found in death - No Outliers found in HPV Combined - No Outliers found in Chemotherapy
[info ] Plotting Pairplot... [jarvais] call=visualization.__call__:125 15:28:56 [info ] Plotting UMAP... [jarvais] call=visualization.__call__:131 15:29:06 [info ] Plotting Frequency Table... [jarvais] call=visualization.__call__:128 15:29:15 [info ] Plotting Multiplot... [jarvais] call=visualization.__call__:146 15:29:30 [info ] Computing statistical ranking for dashboard... [jarvais] call=dashboard.__call__:77 [info ] Analyzing statistical significance for 9 categorical × 2 continuous variables [jarvais] call=statistical_ranking.find_top_multiplots:73 [info ] Found 18 total comparisons, 16 significant (p < 0.05) [jarvais] call=statistical_ranking.find_top_multiplots:126 [info ] Most significant p-value: 0.00e+00 [jarvais] call=statistical_ranking.find_top_multiplots:131 [info ] Generating dashboard plot of significant multiplots... [jarvais] call=dashboard.__call__:98 Font MPDFAA+Inter28ptBold is missing the following glyphs: ' ' (\n)
In [4]:
Copied!
from jarvais.trainer import TrainerSupervised
trainer = TrainerSupervised(
output_dir="./outputs/trainer",
target_variable="death",
task="binary",
k_folds=2
)
print(trainer)
analyzer.data['death'] = analyzer.data['death'].astype(int)
trainer.run(analyzer.data)
from jarvais.trainer import TrainerSupervised
trainer = TrainerSupervised(
output_dir="./outputs/trainer",
target_variable="death",
task="binary",
k_folds=2
)
print(trainer)
analyzer.data['death'] = analyzer.data['death'].astype(int)
trainer.run(analyzer.data)
15:29:40 [warning ] One-hot encoding is disabled for binary and multiclass tasks due to autogluon's OneHotEncoder implementation. If you want to use one-hot encoding, edit the trainer settings manually. [jarvais] call=trainer.__init__:54
TrainerSupervised( TrainerSettings( output_dir=PosixPath('outputs/trainer'), target_variable='death', task='binary', stratify_on=None, test_size=0.2, random_state=42, explain=False, encoding_module=OneHotEncodingModule(columns=None, prefix_sep='|', enabled=False), reduction_module=FeatureReductionModule(method=None, task='binary', keep_k=2, enabled=True), trainer_module=AutogluonTabularWrapper( output_dir=PosixPath('outputs/trainer'), target_variable='death', task='binary', eval_metric='roc_auc', k_folds=2, extra_metrics=['f1', 'auprc'], kwargs={} ) ) )
[warning ] One-hot encoding is disabled. [jarvais] call=encoding.__call__:34 [info ] Skipping feature reduction. [jarvais] call=feature_reduction.__call__:39 [info ] Training fold 1/2... [jarvais] call=autogluon_trainer._train_autogluon_with_cv:194 15:30:12 [info ] Fold 1/2 score: 0.7761862315751399 (roc_auc) [jarvais] call=autogluon_trainer._train_autogluon_with_cv:211 [info ] Training fold 2/2... [jarvais] call=autogluon_trainer._train_autogluon_with_cv:194 15:30:39 [info ] Fold 2/2 score: 0.750053611337183 (roc_auc) [jarvais] call=autogluon_trainer._train_autogluon_with_cv:211
Model Leaderboard ---------------- +-----------------------+----------------------------+----------------------------+----------------------------+ | model | score_train | score_val | score_test | +=======================+============================+============================+============================+ | NeuralNetTorch | ROC_AUC: 0.79 [0.78, 0.79] | ROC_AUC: 0.8 [0.78, 0.83] | ROC_AUC: 0.76 [0.75, 0.76] | | | F1: 0.57 [0.56, 0.58] | F1: 0.6 [0.57, 0.62] | F1: 0.54 [0.52, 0.55] | | | AUPRC: 0.65 [0.64, 0.66] | AUPRC: 0.67 [0.65, 0.69] | AUPRC: 0.62 [0.61, 0.63] | +-----------------------+----------------------------+----------------------------+----------------------------+ | WeightedEnsemble_L2 | ROC_AUC: 0.82 [0.81, 0.83] | ROC_AUC: 0.82 [0.78, 0.87] | ROC_AUC: 0.75 [0.74, 0.76] | | | F1: 0.56 [0.55, 0.58] | F1: 0.56 [0.5, 0.63] | F1: 0.48 [0.48, 0.48] | | | AUPRC: 0.68 [0.67, 0.68] | AUPRC: 0.67 [0.61, 0.74] | AUPRC: 0.6 [0.6, 0.6] | +-----------------------+----------------------------+----------------------------+----------------------------+ | CatBoost | ROC_AUC: 0.8 [0.8, 0.8] | ROC_AUC: 0.82 [0.78, 0.86] | ROC_AUC: 0.75 [0.73, 0.76] | | | F1: 0.56 [0.55, 0.56] | F1: 0.57 [0.52, 0.63] | F1: 0.49 [0.48, 0.49] | | | AUPRC: 0.66 [0.66, 0.66] | AUPRC: 0.67 [0.63, 0.71] | AUPRC: 0.6 [0.6, 0.6] | +-----------------------+----------------------------+----------------------------+----------------------------+ | LightGBMXT | ROC_AUC: 0.8 [0.79, 0.81] | ROC_AUC: 0.81 [0.78, 0.84] | ROC_AUC: 0.74 [0.74, 0.75] | | | F1: 0.55 [0.54, 0.56] | F1: 0.56 [0.54, 0.58] | F1: 0.45 [0.45, 0.45] | | | AUPRC: 0.66 [0.65, 0.67] | AUPRC: 0.66 [0.64, 0.68] | AUPRC: 0.57 [0.57, 0.57] | +-----------------------+----------------------------+----------------------------+----------------------------+ | SimpleRegressionModel | ROC_AUC: 0.77 [0.77, 0.77] | ROC_AUC: 0.79 [0.78, 0.79] | ROC_AUC: 0.73 [0.73, 0.74] | | | F1: 0.52 [0.51, 0.52] | F1: 0.54 [0.53, 0.56] | F1: 0.47 [0.46, 0.48] | | | AUPRC: 0.63 [0.63, 0.63] | AUPRC: 0.65 [0.64, 0.66] | AUPRC: 0.59 [0.58, 0.6] | +-----------------------+----------------------------+----------------------------+----------------------------+ | NeuralNetFastAI | ROC_AUC: 0.77 [0.77, 0.77] | ROC_AUC: 0.79 [0.75, 0.82] | ROC_AUC: 0.73 [0.73, 0.74] | | | F1: 0.54 [0.53, 0.55] | F1: 0.56 [0.52, 0.59] | F1: 0.46 [0.45, 0.47] | | | AUPRC: 0.64 [0.64, 0.64] | AUPRC: 0.66 [0.62, 0.69] | AUPRC: 0.58 [0.57, 0.59] | +-----------------------+----------------------------+----------------------------+----------------------------+ | XGBoost | ROC_AUC: 0.81 [0.8, 0.82] | ROC_AUC: 0.81 [0.75, 0.88] | ROC_AUC: 0.73 [0.72, 0.74] | | | F1: 0.56 [0.53, 0.58] | F1: 0.56 [0.5, 0.62] | F1: 0.45 [0.45, 0.45] | | | AUPRC: 0.67 [0.65, 0.68] | AUPRC: 0.66 [0.6, 0.72] | AUPRC: 0.57 [0.56, 0.58] | +-----------------------+----------------------------+----------------------------+----------------------------+ | LightGBM | ROC_AUC: 0.82 [0.82, 0.82] | ROC_AUC: 0.82 [0.75, 0.89] | ROC_AUC: 0.73 [0.71, 0.75] | | | F1: 0.57 [0.55, 0.59] | F1: 0.58 [0.51, 0.66] | F1: 0.45 [0.44, 0.45] | | | AUPRC: 0.68 [0.67, 0.69] | AUPRC: 0.69 [0.62, 0.76] | AUPRC: 0.57 [0.56, 0.58] | +-----------------------+----------------------------+----------------------------+----------------------------+ | RandomForestEntr | ROC_AUC: 0.88 [0.87, 0.89] | ROC_AUC: 0.86 [0.75, 0.97] | ROC_AUC: 0.71 [0.7, 0.73] | | | F1: 0.72 [0.71, 0.72] | F1: 0.71 [0.52, 0.91] | F1: 0.49 [0.48, 0.5] | | | AUPRC: 0.77 [0.77, 0.77] | AUPRC: 0.77 [0.61, 0.93] | AUPRC: 0.58 [0.57, 0.59] | +-----------------------+----------------------------+----------------------------+----------------------------+ | RandomForestGini | ROC_AUC: 0.88 [0.87, 0.89] | ROC_AUC: 0.86 [0.75, 0.97] | ROC_AUC: 0.71 [0.69, 0.72] | | | F1: 0.72 [0.71, 0.73] | F1: 0.72 [0.52, 0.91] | F1: 0.48 [0.47, 0.49] | | | AUPRC: 0.77 [0.77, 0.78] | AUPRC: 0.77 [0.61, 0.93] | AUPRC: 0.57 [0.57, 0.58] | +-----------------------+----------------------------+----------------------------+----------------------------+ | ExtraTreesGini | ROC_AUC: 0.87 [0.86, 0.88] | ROC_AUC: 0.86 [0.75, 0.97] | ROC_AUC: 0.7 [0.69, 0.71] | | | F1: 0.71 [0.7, 0.72] | F1: 0.71 [0.51, 0.91] | F1: 0.49 [0.47, 0.51] | | | AUPRC: 0.76 [0.75, 0.77] | AUPRC: 0.76 [0.59, 0.92] | AUPRC: 0.58 [0.56, 0.59] | +-----------------------+----------------------------+----------------------------+----------------------------+ | ExtraTreesEntr | ROC_AUC: 0.87 [0.86, 0.88] | ROC_AUC: 0.86 [0.75, 0.96] | ROC_AUC: 0.7 [0.69, 0.71] | | | F1: 0.71 [0.7, 0.72] | F1: 0.72 [0.52, 0.91] | F1: 0.47 [0.46, 0.49] | | | AUPRC: 0.76 [0.76, 0.77] | AUPRC: 0.77 [0.6, 0.93] | AUPRC: 0.56 [0.55, 0.58] | +-----------------------+----------------------------+----------------------------+----------------------------+ | LightGBMLarge | ROC_AUC: 0.83 [0.82, 0.83] | ROC_AUC: 0.81 [0.69, 0.94] | ROC_AUC: 0.69 [0.67, 0.72] | | | F1: 0.58 [0.57, 0.59] | F1: 0.58 [0.38, 0.77] | F1: 0.36 [0.36, 0.37] | | | AUPRC: 0.7 [0.68, 0.71] | AUPRC: 0.68 [0.52, 0.85] | AUPRC: 0.5 [0.49, 0.52] | +-----------------------+----------------------------+----------------------------+----------------------------+ | KNeighborsUnif | ROC_AUC: 0.67 [0.66, 0.68] | ROC_AUC: 0.66 [0.59, 0.73] | ROC_AUC: 0.61 [0.6, 0.62] | | | F1: 0.42 [0.39, 0.44] | F1: 0.42 [0.37, 0.46] | F1: 0.37 [0.35, 0.38] | | | AUPRC: 0.54 [0.52, 0.55] | AUPRC: 0.53 [0.49, 0.58] | AUPRC: 0.49 [0.48, 0.5] | +-----------------------+----------------------------+----------------------------+----------------------------+ | KNeighborsDist | ROC_AUC: 0.73 [0.73, 0.73] | ROC_AUC: 0.72 [0.55, 0.89] | ROC_AUC: 0.57 [0.54, 0.59] | | | F1: 0.51 [0.5, 0.52] | F1: 0.5 [0.32, 0.68] | F1: 0.35 [0.33, 0.38] | | | AUPRC: 0.61 [0.61, 0.62] | AUPRC: 0.6 [0.44, 0.76] | AUPRC: 0.47 [0.45, 0.49] | +-----------------------+----------------------------+----------------------------+----------------------------+
In [5]:
Copied!
from jarvais.explainer import Explainer
sensitive_features = ['N Stage', 'Disease Site', 'Sex']
explainer = Explainer(output_dir="./outputs/explainer", sensitive_features=sensitive_features)
explainer.run(trainer)
from jarvais.explainer import Explainer
sensitive_features = ['N Stage', 'Disease Site', 'Sex']
explainer = Explainer(output_dir="./outputs/explainer", sensitive_features=sensitive_features)
explainer.run(trainer)
15:30:42 [info ] Running Bias Audit Module... [jarvais] call=bias_audit.__call__:57
⚠️ **Possible Bias Detected in N Stage** ⚠️
=== Subgroup Analysis for 'N Stage' Using OLS Regression ===
Model Statistics:
R-squared: 0.031
F-statistic: 3.055
F-statistic p-value: 0.0036
AIC: 874.42
Log-Likelihood: -429.21
Model Coefficients:
+---------------+---------------+------------------+
| Feature | Coefficient | Standard Error |
+===============+===============+==================+
| const | 0.457 | 0.024 |
+---------------+---------------+------------------+
| N Stage_N0 | 0.118 | 0.035 |
+---------------+---------------+------------------+
| N Stage_N1 | 0.085 | 0.055 |
+---------------+---------------+------------------+
| N Stage_N2 | -0.196 | 0.071 |
+---------------+---------------+------------------+
| N Stage_N2a | -0.050 | 0.081 |
+---------------+---------------+------------------+
| N Stage_N2b | 0.053 | 0.042 |
+---------------+---------------+------------------+
| N Stage_N2c | 0.150 | 0.046 |
+---------------+---------------+------------------+
| N Stage_N3 | 0.180 | 0.082 |
+---------------+---------------+------------------+
| N Stage_Other | 0.117 | 0.125 |
+---------------+---------------+------------------+
=== Subgroup Analysis for 'N Stage' using FairLearn ===
+---------------------+-----------+-----------+-----------+-----------+
| | N0 | N1 | N2 | N2a |
+=====================+===========+===========+===========+===========+
| mean_prediction | 0.139442 | 0.238806 | 0.0540541 | 0.107143 |
+---------------------+-----------+-----------+-----------+-----------+
| false_positive_rate | 0.0971429 | 0.0909091 | 0.030303 | 0.0454545 |
+---------------------+-----------+-----------+-----------+-----------+
⚠️ **Possible Bias Detected in Disease Site** ⚠️
=== Subgroup Analysis for 'Disease Site' Using OLS Regression ===
Model Statistics:
R-squared: 0.032
F-statistic: 2.719
F-statistic p-value: 0.0059
AIC: 876.02
Log-Likelihood: -429.01
Model Coefficients:
+--------------------------------+---------------+------------------+
| Feature | Coefficient | Standard Error |
+================================+===============+==================+
| const | 0.521 | 0.030 |
+--------------------------------+---------------+------------------+
| Disease Site_Other | 0.246 | 0.108 |
+--------------------------------+---------------+------------------+
| Disease Site_esophagus | 0.235 | 0.159 |
+--------------------------------+---------------+------------------+
| Disease Site_hypopharynx | 0.147 | 0.080 |
+--------------------------------+---------------+------------------+
| Disease Site_larynx | 0.028 | 0.043 |
+--------------------------------+---------------+------------------+
| Disease Site_lip & oral cavity | 0.142 | 0.102 |
+--------------------------------+---------------+------------------+
| Disease Site_nasal cavity | -0.023 | 0.123 |
+--------------------------------+---------------+------------------+
| Disease Site_nasopharynx | -0.140 | 0.058 |
+--------------------------------+---------------+------------------+
| Disease Site_oropharynx | 0.034 | 0.039 |
+--------------------------------+---------------+------------------+
| Disease Site_unknown | -0.149 | 0.081 |
+--------------------------------+---------------+------------------+
15:30:44 [info ] Running Visualization Module... [jarvais] call=interpretation.__call__:38
=== Subgroup Analysis for 'Disease Site' using FairLearn ===
+---------------------+----------+-------------+---------------+----------+
| | Other | esophagus | hypopharynx | larynx |
+=====================+==========+=============+===============+==========+
| mean_prediction | 0.1875 | 0.142857 | 0.548387 | 0.189189 |
+---------------------+----------+-------------+---------------+----------+
| false_positive_rate | 0.142857 | 0 | 0.444444 | 0.139706 |
+---------------------+----------+-------------+---------------+----------+
Since target_class not specified, SHAP will explain predictions for each class
100%|██████████| 100/100 [04:32<00:00, 2.72s/it] 15:35:41 [info ] Running Feature Importance Module... [jarvais] call=importance.__call__:25