Regression
In [1]:
Copied!
import sys
import pandas as pd
import os
project_root = os.path.abspath(os.path.join(os.getcwd(), "../../"))
sys.path.append(project_root)
data_dir = os.path.join(project_root, 'data')
data_file_path = os.path.join(data_dir, 'RADCURE_processed_clinical.csv')
df = pd.read_csv(data_file_path, index_col=0)
df.drop(columns=["Study ID", "survival_time", "death"], inplace=True)
import sys
import pandas as pd
import os
project_root = os.path.abspath(os.path.join(os.getcwd(), "../../"))
sys.path.append(project_root)
data_dir = os.path.join(project_root, 'data')
data_file_path = os.path.join(data_dir, 'RADCURE_processed_clinical.csv')
df = pd.read_csv(data_file_path, index_col=0)
df.drop(columns=["Study ID", "survival_time", "death"], inplace=True)
In [2]:
Copied!
from jarvais.analyzer import Analyzer
from rich import print
analyzer = Analyzer(
data=df,
output_dir='./outputs/analyzer',
categorical_columns= [
"Sex",
"T Stage",
"N Stage",
"Stage",
"Smoking Status",
"Disease Site",
"HPV Combined",
"Chemotherapy",
],
continuous_columns = [
"age at dx",
"Dose"
],
target_variable='Dose',
task='classification'
)
print(analyzer)
analyzer.run()
from jarvais.analyzer import Analyzer
from rich import print
analyzer = Analyzer(
data=df,
output_dir='./outputs/analyzer',
categorical_columns= [
"Sex",
"T Stage",
"N Stage",
"Stage",
"Smoking Status",
"Disease Site",
"HPV Combined",
"Chemotherapy",
],
continuous_columns = [
"age at dx",
"Dose"
],
target_variable='Dose',
task='classification'
)
print(analyzer)
analyzer.run()
/home/joshua-siraj/Documents/CDI/jarvais/.pixi/envs/dev/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm 16:21:23 [warning ] Date columns not specified. Inferring from remaining columns. [jarvais] call=analyzer.__init__:85
Analyzer( AnalyzerSettings( output_dir=PosixPath('outputs/analyzer'), categorical_columns=[ 'Sex', 'T Stage', 'N Stage', 'Stage', 'Smoking Status', 'Disease Site', 'HPV Combined', 'Chemotherapy' ], continuous_columns=['age at dx', 'Dose'], date_columns=[], task='classification', target_variable='Dose', generate_report=True, settings_path=None, settings_schema_path=None, missingness=MissingnessModule( enabled=True, categorical_strategy={ 'Sex': 'unknown', 'T Stage': 'unknown', 'N Stage': 'unknown', 'Stage': 'unknown', 'Smoking Status': 'unknown', 'Disease Site': 'unknown', 'HPV Combined': 'unknown', 'Chemotherapy': 'unknown' }, continuous_strategy={'age at dx': 'median', 'Dose': 'median'} ), outlier=OutlierModule( enabled=True, categorical_strategy={ 'Sex': 'frequency', 'T Stage': 'frequency', 'N Stage': 'frequency', 'Stage': 'frequency', 'Smoking Status': 'frequency', 'Disease Site': 'frequency', 'HPV Combined': 'frequency', 'Chemotherapy': 'frequency' }, continuous_strategy={'age at dx': 'none', 'Dose': 'none'}, threshold=0.01, categorical_mapping={}, group_outliers=True ), visualization=DataVisualizationModule( enabled=True, plots=['corr', 'pairplot', 'umap', 'frequency_table', 'multiplot'], save_to_json=False ), boolean=BooleanEncodingModule(enabled=True, columns=[]), dashboard=DashboardModule(enabled=True, n_top=10, significance_threshold=0.05) ) )
[info ] Performing missingness analysis... [jarvais] call=missingness.__call__:40 [info ] Performing outlier analysis... [jarvais] call=outlier.__call__:63 [info ] Plotting Correlation Matrix... [jarvais] call=visualization.__call__:122
+-----------------------+-------------------+-----------+-------------+ | | | Missing | Overall | +=======================+===================+===========+=============+ | n | | | 3346 | +-----------------------+-------------------+-----------+-------------+ | age at dx, mean (SD) | | 0 | 62.3 (11.6) | +-----------------------+-------------------+-----------+-------------+ | Dose, mean (SD) | | 0 | 66.7 (5.8) | +-----------------------+-------------------+-----------+-------------+ | Sex, n (%) | Female | | 686 (20.5) | +-----------------------+-------------------+-----------+-------------+ | | Male | | 2660 (79.5) | +-----------------------+-------------------+-----------+-------------+ | T Stage, n (%) | None | | 12 (0.4) | +-----------------------+-------------------+-----------+-------------+ | | T0 | | 167 (5.0) | +-----------------------+-------------------+-----------+-------------+ | | T1 | | 454 (13.6) | +-----------------------+-------------------+-----------+-------------+ | | T1 (2) | | 1 (0.0) | +-----------------------+-------------------+-----------+-------------+ | | T1a | | 179 (5.3) | +-----------------------+-------------------+-----------+-------------+ | | T1b | | 88 (2.6) | +-----------------------+-------------------+-----------+-------------+ | | T2 | | 927 (27.7) | +-----------------------+-------------------+-----------+-------------+ | | T2 (2) | | 1 (0.0) | +-----------------------+-------------------+-----------+-------------+ | | T2a | | 4 (0.1) | +-----------------------+-------------------+-----------+-------------+ | | T2b | | 5 (0.1) | +-----------------------+-------------------+-----------+-------------+ | | T3 | | 861 (25.7) | +-----------------------+-------------------+-----------+-------------+ | | T3 (2) | | 3 (0.1) | +-----------------------+-------------------+-----------+-------------+ | | T4 | | 116 (3.5) | +-----------------------+-------------------+-----------+-------------+ | | T4a | | 358 (10.7) | +-----------------------+-------------------+-----------+-------------+ | | T4b | | 121 (3.6) | +-----------------------+-------------------+-----------+-------------+ | | TX | | 4 (0.1) | +-----------------------+-------------------+-----------+-------------+ | | Tis | | 44 (1.3) | +-----------------------+-------------------+-----------+-------------+ | | rT0 | | 1 (0.0) | +-----------------------+-------------------+-----------+-------------+ | N Stage, n (%) | N0 | | 1147 (34.3) | +-----------------------+-------------------+-----------+-------------+ | | N1 | | 344 (10.3) | +-----------------------+-------------------+-----------+-------------+ | | N2 | | 182 (5.4) | +-----------------------+-------------------+-----------+-------------+ | | N2a | | 125 (3.7) | +-----------------------+-------------------+-----------+-------------+ | | N2b | | 791 (23.6) | +-----------------------+-------------------+-----------+-------------+ | | N2c | | 532 (15.9) | +-----------------------+-------------------+-----------+-------------+ | | N3 | | 170 (5.1) | +-----------------------+-------------------+-----------+-------------+ | | N3a | | 13 (0.4) | +-----------------------+-------------------+-----------+-------------+ | | N3b | | 28 (0.8) | +-----------------------+-------------------+-----------+-------------+ | | NX | | 1 (0.0) | +-----------------------+-------------------+-----------+-------------+ | | None | | 13 (0.4) | +-----------------------+-------------------+-----------+-------------+ | Stage, n (%) | 0 | | 44 (1.3) | +-----------------------+-------------------+-----------+-------------+ | | I | | 352 (10.5) | +-----------------------+-------------------+-----------+-------------+ | | IB | | 1 (0.0) | +-----------------------+-------------------+-----------+-------------+ | | II | | 400 (12.0) | +-----------------------+-------------------+-----------+-------------+ | | IIA | | 2 (0.1) | +-----------------------+-------------------+-----------+-------------+ | | IIB | | 1 (0.0) | +-----------------------+-------------------+-----------+-------------+ | | III | | 605 (18.1) | +-----------------------+-------------------+-----------+-------------+ | | IIIA | | 2 (0.1) | +-----------------------+-------------------+-----------+-------------+ | | IIIC | | 2 (0.1) | +-----------------------+-------------------+-----------+-------------+ | | IV | | 12 (0.4) | +-----------------------+-------------------+-----------+-------------+ | | IVA | | 1581 (47.3) | +-----------------------+-------------------+-----------+-------------+ | | IVB | | 309 (9.2) | +-----------------------+-------------------+-----------+-------------+ | | IVC | | 2 (0.1) | +-----------------------+-------------------+-----------+-------------+ | | None | | 27 (0.8) | +-----------------------+-------------------+-----------+-------------+ | | X | | 6 (0.2) | +-----------------------+-------------------+-----------+-------------+ | Smoking Status, n (%) | Current | | 1139 (34.0) | +-----------------------+-------------------+-----------+-------------+ | | Ex-smoker | | 1290 (38.6) | +-----------------------+-------------------+-----------+-------------+ | | Non-smoker | | 872 (26.1) | +-----------------------+-------------------+-----------+-------------+ | | unknown | | 45 (1.3) | +-----------------------+-------------------+-----------+-------------+ | Disease Site, n (%) | benign tumor | | 1 (0.0) | +-----------------------+-------------------+-----------+-------------+ | | esophagus | | 33 (1.0) | +-----------------------+-------------------+-----------+-------------+ | | hypopharynx | | 162 (4.8) | +-----------------------+-------------------+-----------+-------------+ | | lacrimal gland | | 1 (0.0) | +-----------------------+-------------------+-----------+-------------+ | | larynx | | 877 (26.2) | +-----------------------+-------------------+-----------+-------------+ | | lip & oral cavity | | 100 (3.0) | +-----------------------+-------------------+-----------+-------------+ | | nasal cavity | | 62 (1.9) | +-----------------------+-------------------+-----------+-------------+ | | nasopharynx | | 355 (10.6) | +-----------------------+-------------------+-----------+-------------+ | | orbit | | 1 (0.0) | +-----------------------+-------------------+-----------+-------------+ | | oropharynx | | 1501 (44.9) | +-----------------------+-------------------+-----------+-------------+ | | other | | 2 (0.1) | +-----------------------+-------------------+-----------+-------------+ | | paraganglioma | | 7 (0.2) | +-----------------------+-------------------+-----------+-------------+ | | paranasal sinus | | 28 (0.8) | +-----------------------+-------------------+-----------+-------------+ | | salivary glands | | 4 (0.1) | +-----------------------+-------------------+-----------+-------------+ | | sarcoma | | 20 (0.6) | +-----------------------+-------------------+-----------+-------------+ | | skin | | 24 (0.7) | +-----------------------+-------------------+-----------+-------------+ | | unknown | | 168 (5.0) | +-----------------------+-------------------+-----------+-------------+ | HPV Combined, n (%) | 1.0 | | 1139 (34.0) | +-----------------------+-------------------+-----------+-------------+ | | None | | 2207 (66.0) | +-----------------------+-------------------+-----------+-------------+ | Chemotherapy, n (%) | 0 | | 1923 (57.5) | +-----------------------+-------------------+-----------+-------------+ | | 1 | | 1423 (42.5) | +-----------------------+-------------------+-----------+-------------+ Outlier Report: - No Outliers found in Sex - Outliers found in T Stage: ['nan: 12 out of 3346', 'T2b: 5 out of 3346', 'T2a: 4 out of 3346', 'TX: 4 out of 3346', 'T3 (2): 3 out of 3346', 'T2 (2): 1 out of 3346', 'T1 (2): 1 out of 3346', 'rT0: 1 out of 3346'] - Outliers found in N Stage: ['N3b: 28 out of 3346', 'N3a: 13 out of 3346', 'nan: 13 out of 3346', 'NX: 1 out of 3346'] - Outliers found in Stage: ['nan: 27 out of 3346', 'IV: 12 out of 3346', 'X: 6 out of 3346', 'IIA: 2 out of 3346', 'IIIA: 2 out of 3346', 'IIIC: 2 out of 3346', 'IVC: 2 out of 3346', 'IB: 1 out of 3346', 'IIB: 1 out of 3346'] - No Outliers found in Smoking Status - Outliers found in Disease Site: ['paranasal sinus: 28 out of 3346', 'skin: 24 out of 3346', 'sarcoma: 20 out of 3346', 'paraganglioma: 7 out of 3346', 'salivary glands: 4 out of 3346', 'other: 2 out of 3346', 'benign tumor: 1 out of 3346', 'lacrimal gland: 1 out of 3346', 'orbit: 1 out of 3346'] - No Outliers found in HPV Combined - No Outliers found in Chemotherapy
[info ] Plotting Pairplot... [jarvais] call=visualization.__call__:125 16:21:24 [info ] Plotting UMAP... [jarvais] call=visualization.__call__:131 16:21:34 [info ] Plotting Frequency Table... [jarvais] call=visualization.__call__:128 16:21:42 [info ] Plotting Multiplot... [jarvais] call=visualization.__call__:146 16:21:58 [info ] Computing statistical ranking for dashboard... [jarvais] call=dashboard.__call__:77 [info ] Analyzing statistical significance for 8 categorical × 2 continuous variables [jarvais] call=statistical_ranking.find_top_multiplots:73 [info ] Found 16 total comparisons, 14 significant (p < 0.05) [jarvais] call=statistical_ranking.find_top_multiplots:126 [info ] Most significant p-value: 0.00e+00 [jarvais] call=statistical_ranking.find_top_multiplots:131 [info ] Generating dashboard plot of significant multiplots... [jarvais] call=dashboard.__call__:98 Font MPDFAA+Inter28ptBold is missing the following glyphs: ' ' (\n)
In [3]:
Copied!
from jarvais.trainer import TrainerSupervised
trainer = TrainerSupervised(
output_dir="./outputs/trainer",
target_variable="Dose",
task="regression",
k_folds=2
)
print(trainer)
trainer.run(analyzer.data)
from jarvais.trainer import TrainerSupervised
trainer = TrainerSupervised(
output_dir="./outputs/trainer",
target_variable="Dose",
task="regression",
k_folds=2
)
print(trainer)
trainer.run(analyzer.data)
16:22:07 [warning ] One-hot encoding is disabled for binary and multiclass tasks due to autogluon's OneHotEncoder implementation. If you want to use one-hot encoding, edit the trainer settings manually. [jarvais] call=trainer.__init__:54
TrainerSupervised( TrainerSettings( output_dir=PosixPath('outputs/trainer'), target_variable='Dose', task='regression', stratify_on=None, test_size=0.2, random_state=42, explain=False, encoding_module=OneHotEncodingModule(columns=None, prefix_sep='|', enabled=False), reduction_module=FeatureReductionModule(method=None, task='regression', keep_k=2, enabled=True), trainer_module=AutogluonTabularWrapper( output_dir=PosixPath('outputs/trainer'), target_variable='Dose', task='regression', eval_metric='r2', k_folds=2, extra_metrics=['root_mean_squared_error'], kwargs={} ) ) )
[warning ] One-hot encoding is disabled. [jarvais] call=encoding.__call__:34 [info ] Skipping feature reduction. [jarvais] call=feature_reduction.__call__:39 [info ] Training fold 1/2... [jarvais] call=autogluon_trainer._train_autogluon_with_cv:194 16:23:11 [info ] Fold 1/2 score: 0.5817623676725117 (r2) [jarvais] call=autogluon_trainer._train_autogluon_with_cv:211 [info ] Training fold 2/2... [jarvais] call=autogluon_trainer._train_autogluon_with_cv:194 16:23:47 [info ] Fold 2/2 score: 0.5929594381641264 (r2) [jarvais] call=autogluon_trainer._train_autogluon_with_cv:211
Model Leaderboard ---------------- +-----------------------+-----------------------------------------------+-----------------------------------------------+-----------------------------------------------+ | model | score_train | score_val | score_test | +=======================+===============================================+===============================================+===============================================+ | CatBoost | R2: 0.62 [0.62, 0.62] | R2: 0.63 [0.6, 0.67] | R2: 0.6 [0.6, 0.6] | | | ROOT_MEAN_SQUARED_ERROR: -3.54 [-3.54, -3.53] | ROOT_MEAN_SQUARED_ERROR: -3.55 [-3.72, -3.37] | ROOT_MEAN_SQUARED_ERROR: -3.66 [-3.66, -3.66] | +-----------------------+-----------------------------------------------+-----------------------------------------------+-----------------------------------------------+ | WeightedEnsemble_L2 | R2: 0.63 [0.63, 0.64] | R2: 0.63 [0.59, 0.67] | R2: 0.6 [0.59, 0.6] | | | ROOT_MEAN_SQUARED_ERROR: -3.49 [-3.5, -3.47] | ROOT_MEAN_SQUARED_ERROR: -3.54 [-3.73, -3.34] | ROOT_MEAN_SQUARED_ERROR: -3.66 [-3.68, -3.63] | +-----------------------+-----------------------------------------------+-----------------------------------------------+-----------------------------------------------+ | LightGBMXT | R2: 0.62 [0.61, 0.62] | R2: 0.63 [0.6, 0.67] | R2: 0.59 [0.57, 0.6] | | | ROOT_MEAN_SQUARED_ERROR: -3.56 [-3.58, -3.55] | ROOT_MEAN_SQUARED_ERROR: -3.54 [-3.71, -3.37] | ROOT_MEAN_SQUARED_ERROR: -3.71 [-3.77, -3.66] | +-----------------------+-----------------------------------------------+-----------------------------------------------+-----------------------------------------------+ | NeuralNetFastAI | R2: 0.61 [0.6, 0.61] | R2: 0.62 [0.58, 0.66] | R2: 0.58 [0.58, 0.58] | | | ROOT_MEAN_SQUARED_ERROR: -3.61 [-3.65, -3.57] | ROOT_MEAN_SQUARED_ERROR: -3.6 [-3.8, -3.39] | ROOT_MEAN_SQUARED_ERROR: -3.74 [-3.75, -3.73] | +-----------------------+-----------------------------------------------+-----------------------------------------------+-----------------------------------------------+ | LightGBM | R2: 0.64 [0.62, 0.66] | R2: 0.62 [0.56, 0.68] | R2: 0.57 [0.56, 0.57] | | | ROOT_MEAN_SQUARED_ERROR: -3.45 [-3.55, -3.35] | ROOT_MEAN_SQUARED_ERROR: -3.59 [-3.89, -3.29] | ROOT_MEAN_SQUARED_ERROR: -3.79 [-3.81, -3.78] | +-----------------------+-----------------------------------------------+-----------------------------------------------+-----------------------------------------------+ | ExtraTreesMSE | R2: 0.68 [0.68, 0.69] | R2: 0.69 [0.53, 0.85] | R2: 0.56 [0.55, 0.57] | | | ROOT_MEAN_SQUARED_ERROR: -3.25 [-3.28, -3.23] | ROOT_MEAN_SQUARED_ERROR: -3.12 [-3.99, -2.25] | ROOT_MEAN_SQUARED_ERROR: -3.82 [-3.87, -3.78] | +-----------------------+-----------------------------------------------+-----------------------------------------------+-----------------------------------------------+ | RandomForestMSE | R2: 0.68 [0.68, 0.68] | R2: 0.69 [0.54, 0.85] | R2: 0.55 [0.53, 0.57] | | | ROOT_MEAN_SQUARED_ERROR: -3.26 [-3.27, -3.24] | ROOT_MEAN_SQUARED_ERROR: -3.13 [-3.99, -2.28] | ROOT_MEAN_SQUARED_ERROR: -3.87 [-3.96, -3.77] | +-----------------------+-----------------------------------------------+-----------------------------------------------+-----------------------------------------------+ | XGBoost | R2: 0.64 [0.63, 0.65] | R2: 0.63 [0.52, 0.74] | R2: 0.54 [0.52, 0.55] | | | ROOT_MEAN_SQUARED_ERROR: -3.43 [-3.48, -3.38] | ROOT_MEAN_SQUARED_ERROR: -3.5 [-4.04, -2.97] | ROOT_MEAN_SQUARED_ERROR: -3.93 [-3.99, -3.87] | +-----------------------+-----------------------------------------------+-----------------------------------------------+-----------------------------------------------+ | NeuralNetTorch | R2: 0.52 [0.49, 0.55] | R2: 0.54 [0.52, 0.56] | R2: 0.51 [0.49, 0.53] | | | ROOT_MEAN_SQUARED_ERROR: -4.0 [-4.13, -3.87] | ROOT_MEAN_SQUARED_ERROR: -3.96 [-4.03, -3.88] | ROOT_MEAN_SQUARED_ERROR: -4.04 [-4.14, -3.94] | +-----------------------+-----------------------------------------------+-----------------------------------------------+-----------------------------------------------+ | LightGBMLarge | R2: 0.62 [0.61, 0.63] | R2: 0.62 [0.47, 0.76] | R2: 0.5 [0.5, 0.5] | | | ROOT_MEAN_SQUARED_ERROR: -3.54 [-3.59, -3.48] | ROOT_MEAN_SQUARED_ERROR: -3.55 [-4.25, -2.85] | ROOT_MEAN_SQUARED_ERROR: -4.08 [-4.09, -4.08] | +-----------------------+-----------------------------------------------+-----------------------------------------------+-----------------------------------------------+ | SimpleRegressionModel | R2: 0.4 [0.4, 0.4] | R2: 0.41 [0.4, 0.41] | R2: 0.4 [0.4, 0.41] | | | ROOT_MEAN_SQUARED_ERROR: -4.46 [-4.46, -4.46] | ROOT_MEAN_SQUARED_ERROR: -4.51 [-4.52, -4.5] | ROOT_MEAN_SQUARED_ERROR: -4.46 [-4.47, -4.45] | +-----------------------+-----------------------------------------------+-----------------------------------------------+-----------------------------------------------+ | KNeighborsDist | R2: -0.03 [-0.05, -0.02] | R2: -0.04 [-0.31, 0.24] | R2: -0.28 [-0.36, -0.21] | | | ROOT_MEAN_SQUARED_ERROR: -5.84 [-5.89, -5.8] | ROOT_MEAN_SQUARED_ERROR: -5.9 [-6.7, -5.11] | ROOT_MEAN_SQUARED_ERROR: -6.53 [-6.72, -6.35] | +-----------------------+-----------------------------------------------+-----------------------------------------------+-----------------------------------------------+ | KNeighborsUnif | R2: 0.04 [0.03, 0.06] | R2: 0.05 [-0.06, 0.16] | R2: -0.08 [-0.09, -0.08] | | | ROOT_MEAN_SQUARED_ERROR: -5.63 [-5.68, -5.58] | ROOT_MEAN_SQUARED_ERROR: -5.68 [-6.02, -5.35] | ROOT_MEAN_SQUARED_ERROR: -6.0 [-6.02, -5.99] | +-----------------------+-----------------------------------------------+-----------------------------------------------+-----------------------------------------------+
In [4]:
Copied!
from jarvais.explainer import Explainer
sensitive_features = ['N Stage', 'Disease Site', 'Sex']
explainer = Explainer(output_dir="./outputs/explainer", sensitive_features=sensitive_features)
explainer.run(trainer)
from jarvais.explainer import Explainer
sensitive_features = ['N Stage', 'Disease Site', 'Sex']
explainer = Explainer(output_dir="./outputs/explainer", sensitive_features=sensitive_features)
explainer.run(trainer)
16:23:49 [info ] Running Bias Audit Module... [jarvais] call=bias_audit.__call__:57
⚠️ **Possible Bias Detected in N Stage** ⚠️
=== Subgroup Analysis for 'N Stage' Using OLS Regression ===
Model Statistics:
R-squared: 0.185
F-statistic: 21.426
F-statistic p-value: 0.0000
AIC: 3136.36
Log-Likelihood: -1560.18
Model Coefficients:
+---------------+---------------+------------------+
| Feature | Coefficient | Standard Error |
+===============+===============+==================+
| const | 1.973 | 0.135 |
+---------------+---------------+------------------+
| N Stage_N0 | 1.967 | 0.200 |
+---------------+---------------+------------------+
| N Stage_N1 | 0.792 | 0.319 |
+---------------+---------------+------------------+
| N Stage_N2 | -1.354 | 0.370 |
+---------------+---------------+------------------+
| N Stage_N2a | 0.345 | 0.438 |
+---------------+---------------+------------------+
| N Stage_N2b | -0.496 | 0.215 |
+---------------+---------------+------------------+
| N Stage_N2c | -0.080 | 0.257 |
+---------------+---------------+------------------+
| N Stage_N3 | -1.069 | 0.391 |
+---------------+---------------+------------------+
| N Stage_Other | 1.867 | 0.747 |
+---------------+---------------+------------------+
=== Subgroup Analysis for 'N Stage' using FairLearn ===
+-----------------+--------+---------+---------+---------+
| | N0 | N1 | N2 | N2a |
+=================+========+=========+=========+=========+
| mean_prediction | 63.368 | 67.9398 | 69.9615 | 68.3965 |
+-----------------+--------+---------+---------+---------+
⚠️ **Possible Bias Detected in Disease Site** ⚠️
=== Subgroup Analysis for 'Disease Site' Using OLS Regression ===
Model Statistics:
R-squared: 0.199
F-statistic: 20.589
F-statistic p-value: 0.0000
AIC: 3126.11
Log-Likelihood: -1554.06
Model Coefficients:
+--------------------------------+---------------+------------------+
| Feature | Coefficient | Standard Error |
+================================+===============+==================+
| const | 2.945 | 0.154 |
+--------------------------------+---------------+------------------+
| Disease Site_Other | 2.440 | 0.593 |
+--------------------------------+---------------+------------------+
| Disease Site_esophagus | 3.561 | 0.799 |
+--------------------------------+---------------+------------------+
| Disease Site_hypopharynx | -0.843 | 0.400 |
+--------------------------------+---------------+------------------+
| Disease Site_larynx | 0.716 | 0.231 |
+--------------------------------+---------------+------------------+
| Disease Site_lip & oral cavity | 2.104 | 0.508 |
+--------------------------------+---------------+------------------+
| Disease Site_nasal cavity | -1.011 | 0.612 |
+--------------------------------+---------------+------------------+
| Disease Site_nasopharynx | -2.398 | 0.306 |
+--------------------------------+---------------+------------------+
| Disease Site_oropharynx | -1.083 | 0.200 |
+--------------------------------+---------------+------------------+
| Disease Site_unknown | -0.543 | 0.379 |
+--------------------------------+---------------+------------------+
=== Subgroup Analysis for 'Disease Site' using FairLearn ===
+-----------------+---------+-------------+---------------+----------+
| | Other | esophagus | hypopharynx | larynx |
+=================+=========+=============+===============+==========+
| mean_prediction | 65.3827 | 66.3786 | 68.0047 | 62.7086 |
+-----------------+---------+-------------+---------------+----------+
⚠️ **Possible Bias Detected in Sex** ⚠️
=== Subgroup Analysis for 'Sex' Using OLS Regression ===
Model Statistics:
R-squared: 0.012
F-statistic: 4.219
F-statistic p-value: 0.0151
AIC: 3254.76
Log-Likelihood: -1624.38
Model Coefficients:
+------------+---------------------+--------------------+
| Feature | Coefficient | Standard Error |
+============+=====================+====================+
| const | 90656299303385.656 | 40069012658174.047 |
+------------+---------------------+--------------------+
| Sex_Female | -90656299303382.750 | 40069012658174.148 |
+------------+---------------------+--------------------+
| Sex_Male | -90656299303383.500 | 40069012658174.156 |
+------------+---------------------+--------------------+
16:23:51 [info ] Running Visualization Module... [jarvais] call=interpretation.__call__:38
=== Subgroup Analysis for 'Sex' using FairLearn ===
+-----------------+----------+---------+
| | Female | Male |
+=================+==========+=========+
| mean_prediction | 67.1106 | 66.9931 |
+-----------------+----------+---------+
16:23:58 [info ] Running Feature Importance Module... [jarvais] call=importance.__call__:25