! git clone https://github.com/predictive-clinical-neuroscience/PCNtoolkit-demo.git
Cloning into 'PCNtoolkit-demo'...
remote: Enumerating objects: 1237, done.
remote: Counting objects: 100% (360/360), done.
remote: Compressing objects: 100% (185/185), done.
remote: Total 1237 (delta 200), reused 306 (delta 172), pack-reused 877 (from 1)
Receiving objects: 100% (1237/1237), 141.45 MiB | 10.83 MiB/s, done.
Resolving deltas: 100% (562/562), done.
Updating files: 100% (70/70), done.
import os
os.chdir('/content/PCNtoolkit-demo/')
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))
import numpy as np
from matplotlib import pyplot as plt
from scipy import stats, linalg
from sklearn import preprocessing, decomposition, linear_model, metrics
import warnings
# set fontsizes for matplotlib plots
baseline_fontsize = 12
SMALL_SIZE = 8 + baseline_fontsize
MEDIUM_SIZE = 10 + baseline_fontsize
BIGGER_SIZE = 12 + baseline_fontsize

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

Load Data

hcp_z = np.load('data/hcpya_z.npy')
hcp_ct = np.load('data/hcpya_ct.npy')
gscores = np.load('data/hcpya_g.npy')
print(hcp_z.shape)
print(hcp_ct.shape)
print(gscores.shape)
(946, 187)
(946, 151)
(946,)

Create Train/Test Splits

# generate train/test splits
np.random.seed(42)
n_train = int(0.8 * hcp_z.shape[0])

train_idxs = np.random.choice(range(hcp_z.shape[0]), size=n_train, replace=False)
test_idxs = np.array([x for x in range(hcp_z.shape[0]) if x not in train_idxs])
train_data_z = hcp_z[train_idxs, :]
test_data_z = hcp_z[test_idxs, :]

train_data_ct = hcp_ct[train_idxs, :]
test_data_ct = hcp_ct[test_idxs, :]

train_phen = gscores[train_idxs]
test_phen = gscores[test_idxs]
# mean center train/test data (using train means)
train_mu_centered_z = (train_data_z - train_data_z.mean(axis=0))
test_mu_centered_z = (test_data_z - train_data_z.mean(axis=0))

train_mu_centered_ct = (train_data_ct - train_data_ct.mean(axis=0))
test_mu_centered_ct = (test_data_ct - train_data_ct.mean(axis=0))

Principal Component Regression (BBS)

pca_model_z = decomposition.PCA(n_components=75).fit(train_data_z)
# from pca documentation, "the input data is centered but not scaled for each feature before applying the SVD"
pca_model_ct = decomposition.PCA(n_components=75).fit(train_data_ct)
# from pca documentation, "the input data is centered but not scaled for each feature before applying the SVD"
print(f'First PC explains {pca_model_z.explained_variance_ratio_[0]*100:.2f}% of the total variance.\nThis is an artifact of zero inflated data')
plt.figure(figsize=(10, 7))
plt.bar(range(1, 51), pca_model_z.explained_variance_ratio_[1:51])
plt.title('Deviations model Variance Explained Ratio\nPCs 1-50', fontsize=25)
plt.show()
First PC explains 23.41% of the total variance.
This is an artifact of zero inflated data
../_images/other_predictive_models_17_1.png
print(f'First PC explains {pca_model_ct.explained_variance_ratio_[0]*100:.2f}% of the total variance.\nThis is an artifact of zero inflated data')
plt.figure(figsize=(10, 7))
plt.bar(range(1, 51), pca_model_ct.explained_variance_ratio_[1:51])
plt.title('Cortical Thickness model Variance Explained Ratio\nPCs 1-50', fontsize=25)
plt.show()
First PC explains 24.28% of the total variance.
This is an artifact of zero inflated data
../_images/other_predictive_models_18_1.png
train_transformed_z = pca_model_z.transform(train_data_z)
test_transformed_z = pca_model_z.transform(test_data_z)
train_transformed_ct = pca_model_ct.transform(train_data_ct)
test_transformed_ct = pca_model_ct.transform(test_data_ct)

Fit Linear Regression Model

# fast OLS using matrix math
# we will check that this matches sklearn results later

# fit ols model on dimension reduced train data
train_features_z = np.hstack([np.ones((train_transformed_z.shape[0], 1)),
                            train_transformed_z])
train_features_inv_z = linalg.pinv(train_features_z)
train_betas_z = np.dot(train_features_inv_z, train_phen)
train_pred_phen_z = np.dot(train_features_z, train_betas_z)

# fit ols model on dimension reduced test data
test_features_z = np.hstack([np.ones((test_transformed_z.shape[0], 1)),
                           test_transformed_z])
test_pred_phen_z = np.dot(test_features_z, train_betas_z)
# fast OLS using matrix math
# we will check that this matches sklearn results later

# fit ols model on dimension reduced train data
train_features_ct = np.hstack([np.ones((train_transformed_ct.shape[0], 1)),
                            train_transformed_ct])
train_features_inv_ct = linalg.pinv(train_features_ct)
train_betas_ct = np.dot(train_features_inv_ct, train_phen)
train_pred_phen_ct = np.dot(train_features_ct, train_betas_ct)

# fit ols model on dimension reduced test data
test_features_ct = np.hstack([np.ones((test_transformed_ct.shape[0], 1)),
                           test_transformed_ct])
test_pred_phen_ct = np.dot(test_features_ct, train_betas_ct)
# OLS using sklearn

lr_model_z = linear_model.LinearRegression(fit_intercept=True)
lr_model_z.fit(train_transformed_z, train_phen)
train_pred_phen_lr_model_z = lr_model_z.predict(train_transformed_z)
test_pred_phen_lr_model_z = lr_model_z.predict(test_transformed_z)
# OLS using sklearn

lr_model_ct = linear_model.LinearRegression(fit_intercept=True)
lr_model_ct.fit(train_transformed_ct, train_phen)
train_pred_phen_lr_model_ct = lr_model_ct.predict(train_transformed_ct)
test_pred_phen_lr_model_ct = lr_model_ct.predict(test_transformed_ct)
# ensure matrix math predictions and sklearn predictions are accurate to 5 decimals
assert np.allclose(np.round(train_pred_phen_z - train_pred_phen_lr_model_z, 5), 0), 'Failed'
assert np.allclose(np.round(test_pred_phen_z - test_pred_phen_lr_model_z, 5), 0), 'Failed'
print('Passed')
Passed
# ensure matrix math predictions and sklearn predictions are accurate to 5 decimals
assert np.allclose(np.round(train_pred_phen_ct - train_pred_phen_lr_model_ct, 5), 0), 'Failed'
assert np.allclose(np.round(test_pred_phen_ct - test_pred_phen_lr_model_ct, 5), 0), 'Failed'
print('Passed')
Passed

Accuracy of Predictions

train_r2_z = metrics.r2_score(train_phen, train_pred_phen_lr_model_z)
train_mae_z = metrics.mean_absolute_error(train_phen, train_pred_phen_lr_model_z)
test_mae_z = metrics.mean_absolute_error(test_phen, test_pred_phen_lr_model_z)
train_mae_z = metrics.mean_squared_error(train_phen, train_pred_phen_lr_model_z)
test_mae_z = metrics.mean_squared_error(test_phen, test_pred_phen_lr_model_z)
print(f'Deviation model Train R^2: {train_r2_z:.3f}')
print(f'Deviation model Train MAE: {train_mae_z:.3f}')
print(f'Deviation model Test MAE: {test_mae_z:.3f}')
print(f'Deviation model Train MSE: {train_mae_z:.3f}')
print(f'Deviation model Test MSE: {test_mae_z:.3f}')
Deviation model Train R^2: 0.255
Deviation model Train MAE: 0.532
Deviation model Test MAE: 0.741
Deviation model Train MSE: 0.532
Deviation model Test MSE: 0.741
train_r2_ct = metrics.r2_score(train_phen, train_pred_phen_lr_model_ct)
train_mae_ct = metrics.mean_absolute_error(train_phen, train_pred_phen_lr_model_ct)
test_mae_ct = metrics.mean_absolute_error(test_phen, test_pred_phen_lr_model_ct)
train_mae_ct = metrics.mean_squared_error(train_phen, train_pred_phen_lr_model_ct)
test_mae_ct = metrics.mean_squared_error(test_phen, test_pred_phen_lr_model_ct)
print(f'Cortical thickness model Train R^2: {train_r2_ct:.3f}')
print(f'Cortical thickness model Train MAE: {train_mae_ct:.3f}')
print(f'Cortical thickness model Test MAE: {test_mae_ct:.3f}')
print(f'Cortical thickness model Train MSE: {train_mae_ct:.3f}')
print(f'Cortical thickness model Test MSE: {test_mae_ct:.3f}')
Cortical thickness model Train R^2: 0.185
Cortical thickness model Train MAE: 0.582
Cortical thickness model Test MAE: 0.830
Cortical thickness model Train MSE: 0.582
Cortical thickness model Test MSE: 0.830

BBS Cross Validation

def bbs(X, y, n_components, n_cv_splits, pred_summary_function, verbose=False):
    assert X.shape[0] == y.shape[0]

    fold_accs_train = []
    fold_accs_test = []
    np.random.seed(42)
    shuffled_idxs = np.random.choice(range(X.shape[0]), size=X.shape[0], replace=False)
    for fold_i, test_idxs in enumerate(np.array_split(shuffled_idxs, n_cv_splits)):
        train_mask = np.ones(X.shape[0], bool)
        train_mask[test_idxs] = 0

        # create train/text X, y
        train_X, test_X = X[train_mask, :], X[test_idxs, :]
        train_y, test_y = y[train_mask], y[test_idxs]

        # mean center columns using train data only
        train_X_mu = train_X.mean(axis=0)
        train_X = train_X - train_X_mu
        test_X = test_X - train_X_mu

        # fit pca
        if verbose:
            print(f'CV Fold: {fold_i+1:<10} Fitting PCA model...')
        pca_model = decomposition.PCA(n_components=n_components).fit(train_X)

        # dimension reduce train/test data
        train_X = pca_model.transform(train_X)
        test_X = pca_model.transform(test_X)

        # fit OLS model
        if verbose:
            print(f'CV Fold: {fold_i+1:<10} Fitting Linear Regression model...')
        lr_model = linear_model.LinearRegression(fit_intercept=True)
        lr_model.fit(train_X, train_y)

        train_pred = lr_model.predict(train_X)
        test_pred = lr_model.predict(test_X)

        fold_accs_train.append(pred_summary_function(train_y, train_pred))
        fold_accs_test.append(pred_summary_function(test_y, test_pred))

        if verbose:
            print(f'CV Fold: {fold_i+1:<10} Train Accuracy: {round(fold_accs_train[-1], 3):<10} Test Accuracy: {round(fold_accs_test[-1], 3):<10}')


    plt.figure(figsize=(13, 7))
    plt.plot(range(1, len(fold_accs_train)+1), fold_accs_train, linestyle='-', marker='o', color='C0', label='Train CV Performance')
    plt.plot(range(1, len(fold_accs_test)+1), fold_accs_test, linestyle='-', marker='o', color='C1', label='Test CV Performance')
    plt.title(pred_summary_function.__name__, fontsize=20)
    plt.xticks(range(1, len(fold_accs_test)+1))
    plt.xlabel('CV Fold')
    plt.legend(fontsize=20)
    plt.show()

    return fold_accs_train, fold_accs_test
fold_accs_train_z, fold_accs_test_z = bbs(hcp_z, gscores, n_components=75, n_cv_splits=5, pred_summary_function=metrics.mean_absolute_error, verbose=True)
CV Fold: 1          Fitting PCA model...
CV Fold: 1          Fitting Linear Regression model...
CV Fold: 1          Train Accuracy: 0.599      Test Accuracy: 0.619
CV Fold: 2          Fitting PCA model...
CV Fold: 2          Fitting Linear Regression model...
CV Fold: 2          Train Accuracy: 0.572      Test Accuracy: 0.713
CV Fold: 3          Fitting PCA model...
CV Fold: 3          Fitting Linear Regression model...
CV Fold: 3          Train Accuracy: 0.577      Test Accuracy: 0.687
CV Fold: 4          Fitting PCA model...
CV Fold: 4          Fitting Linear Regression model...
CV Fold: 4          Train Accuracy: 0.604      Test Accuracy: 0.608
CV Fold: 5          Fitting PCA model...
CV Fold: 5          Fitting Linear Regression model...
CV Fold: 5          Train Accuracy: 0.581      Test Accuracy: 0.687
../_images/other_predictive_models_33_1.png
fold_accs_train_ct, fold_accs_test_ct = bbs(hcp_ct, gscores, n_components=75, n_cv_splits=5, pred_summary_function=metrics.mean_absolute_error, verbose=True)
CV Fold: 1          Fitting PCA model...
CV Fold: 1          Fitting Linear Regression model...
CV Fold: 1          Train Accuracy: 0.622      Test Accuracy: 0.643
CV Fold: 2          Fitting PCA model...
CV Fold: 2          Fitting Linear Regression model...
CV Fold: 2          Train Accuracy: 0.605      Test Accuracy: 0.723
CV Fold: 3          Fitting PCA model...
CV Fold: 3          Fitting Linear Regression model...
CV Fold: 3          Train Accuracy: 0.604      Test Accuracy: 0.701
CV Fold: 4          Fitting PCA model...
CV Fold: 4          Fitting Linear Regression model...
CV Fold: 4          Train Accuracy: 0.624      Test Accuracy: 0.646
CV Fold: 5          Fitting PCA model...
CV Fold: 5          Fitting Linear Regression model...
CV Fold: 5          Train Accuracy: 0.614      Test Accuracy: 0.722
../_images/other_predictive_models_34_1.png

Connectome Predictive Modelling

# correlation train_brain with train_phenotype
train_z_pheno_corr_p = [stats.pearsonr(train_data_z[:, i], train_phen) for i in range(train_data_z.shape[1])]  # train_pheno_corr_p: (259200, )
# there are some nan correlations if brain data is poorly cropped (ie: some columns are always 0)
# correlation train_brain with train_phenotype
train_ct_pheno_corr_p = [stats.pearsonr(train_data_ct[:, i], train_phen) for i in range(train_data_ct.shape[1])]  # train_pheno_corr_p: (259200, )
# there are some nan correlations if brain data is poorly cropped (ie: some columns are always 0)
# split into positive and negative correlations
# and keep edges with p values below threshold
pval_threshold = 0.01

train_z_corrs = np.array([x[0] for x in train_z_pheno_corr_p])
train_z_pvals = np.array([x[1] for x in train_z_pheno_corr_p])

keep_edges_pos_z = (train_z_corrs > 0) & (train_z_pvals < pval_threshold)
keep_edges_neg_z = (train_z_corrs < 0) & (train_z_pvals < pval_threshold)

train_ct_corrs = np.array([x[0] for x in train_ct_pheno_corr_p])
train_ct_pvals = np.array([x[1] for x in train_ct_pheno_corr_p])

keep_edges_pos_ct = (train_ct_corrs > 0) & (train_ct_pvals < pval_threshold)
keep_edges_neg_ct = (train_ct_corrs < 0) & (train_ct_pvals < pval_threshold)
print(f'number of positive Z features kept = {np.sum(keep_edges_pos_z)}')
print(f'number of negative Z features kept = {np.sum(keep_edges_neg_z)}')
print(f'number of positive CT features kept = {np.sum(keep_edges_pos_ct)}')
print(f'number of negative CT features kept = {np.sum(keep_edges_neg_ct)}')
number of positive Z features kept = 37
number of negative Z features kept = 2
number of positive CT features kept = 15
number of negative CT features kept = 1
train_pos_edges_sum_z = train_data_z[:, keep_edges_pos_z].sum(1)
train_neg_edges_sum_z = train_data_z[:, keep_edges_neg_z].sum(1)
train_pos_edges_sum_ct = train_data_ct[:, keep_edges_pos_ct].sum(1)
train_neg_edges_sum_ct = train_data_ct[:, keep_edges_neg_ct].sum(1)
fit_pos_z = linear_model.LinearRegression(fit_intercept=True).fit(train_pos_edges_sum_z.reshape(-1, 1), train_phen)
fit_neg_z = linear_model.LinearRegression(fit_intercept=True).fit(train_neg_edges_sum_z.reshape(-1, 1), train_phen)
fit_pos_ct = linear_model.LinearRegression(fit_intercept=True).fit(train_pos_edges_sum_ct.reshape(-1, 1), train_phen)
fit_neg_ct = linear_model.LinearRegression(fit_intercept=True).fit(train_neg_edges_sum_ct.reshape(-1, 1), train_phen)
pos_error_z = metrics.mean_absolute_error(train_phen, fit_pos_z.predict(train_pos_edges_sum_z.reshape(-1, 1)))
neg_error_z = metrics.mean_absolute_error(train_phen, fit_neg_z.predict(train_neg_edges_sum_z.reshape(-1, 1)))
pos_error_ct = metrics.mean_absolute_error(train_phen, fit_pos_ct.predict(train_pos_edges_sum_ct.reshape(-1, 1)))
neg_error_ct = metrics.mean_absolute_error(train_phen, fit_neg_ct.predict(train_neg_edges_sum_ct.reshape(-1, 1)))

print(f'Training Error (Positive Z Features Model) = {pos_error_z:.3f}')
print(f'Training Error (Negative Z Features Model) = {neg_error_z:.3f}')
print(f'Training Error (Positive CT Features Model) = {pos_error_ct:.3f}')
print(f'Training Error (Negative CT Features Model) = {neg_error_ct:.3f}')
Training Error (Positive Z Features Model) = 0.631
Training Error (Negative Z Features Model) = 0.666
Training Error (Positive CT Features Model) = 0.662
Training Error (Negative CT Features Model) = 0.665
# combine positive/negative edges in one linear regression model
fit_pos_neg_z = linear_model.LinearRegression(fit_intercept=True).fit(np.stack((train_pos_edges_sum_z, train_neg_edges_sum_z)).T, train_phen)
# combine positive/negative edges in one linear regression model
fit_pos_neg_ct = linear_model.LinearRegression(fit_intercept=True).fit(np.stack((train_pos_edges_sum_ct, train_neg_edges_sum_ct)).T, train_phen)
pos_neg_error_z = metrics.mean_absolute_error(train_phen, fit_pos_neg_z.predict(np.stack((train_pos_edges_sum_z, train_neg_edges_sum_z)).T))
pos_neg_error_ct = metrics.mean_absolute_error(train_phen, fit_pos_neg_ct.predict(np.stack((train_pos_edges_sum_ct, train_neg_edges_sum_ct)).T))

print(f'Training Error (Positive/Negative Z Features Model) = {pos_neg_error_z:.3f}')
print(f'Training Error (Positive/Negative CT Features Model) = {pos_neg_error_ct:.3f}')
Training Error (Positive/Negative Z Features Model) = 0.620
Training Error (Positive/Negative CT Features Model) = 0.642
# evaluate out of sample performance
test_pos_edges_sum_z = test_data_z[:, keep_edges_pos_z].sum(1)
test_neg_edges_sum_z = test_data_z[:, keep_edges_neg_z].sum(1)

pos_test_error_z = metrics.mean_absolute_error(test_phen, fit_pos_z.predict(test_pos_edges_sum_z.reshape(-1, 1)))
neg_test_error_z = metrics.mean_absolute_error(test_phen, fit_neg_z.predict(test_neg_edges_sum_z.reshape(-1, 1)))
pos_neg_test_error_z = metrics.mean_absolute_error(test_phen, fit_pos_neg_z.predict(np.stack((test_pos_edges_sum_z, test_neg_edges_sum_z)).T))

test_pos_edges_sum_ct = test_data_ct[:, keep_edges_pos_ct].sum(1)
test_neg_edges_sum_ct = test_data_ct[:, keep_edges_neg_ct].sum(1)

pos_test_error_ct = metrics.mean_absolute_error(test_phen, fit_pos_ct.predict(test_pos_edges_sum_ct.reshape(-1, 1)))
neg_test_error_ct = metrics.mean_absolute_error(test_phen, fit_neg_ct.predict(test_neg_edges_sum_ct.reshape(-1, 1)))
pos_neg_test_error_ct = metrics.mean_absolute_error(test_phen, fit_pos_neg_ct.predict(np.stack((test_pos_edges_sum_ct, test_neg_edges_sum_ct)).T))

print(f'Testing Error (Positive Z Features Model) = {pos_test_error_z:.3f}')
print(f'Testing Error (Negative Z Features Model) = {neg_test_error_z:.3f}')
print(f'Testing Error (Positive/Negative Z Features Model) = {pos_neg_test_error_z:.3f}')
print(f'Testing Error (Positive CT Features Model) = {pos_test_error_ct:.3f}')
print(f'Testing Error (Negative CT Features Model) = {neg_test_error_ct:.3f}')
print(f'Testing Error (Positive/Negative CT Features Model) = {pos_neg_test_error_ct:.3f}')
Testing Error (Positive Z Features Model) = 0.705
Testing Error (Negative Z Features Model) = 0.696
Testing Error (Positive/Negative Z Features Model) = 0.697
Testing Error (Positive CT Features Model) = 0.710
Testing Error (Negative CT Features Model) = 0.695
Testing Error (Positive/Negative CT Features Model) = 0.701

CPM Cross Validation

def cpm(X, y, p_threshold, n_cv_splits, pred_summary_function, verbose=False):
    assert X.shape[0] == y.shape[0]

    fold_accs_train = []
    fold_accs_test = []
    np.random.seed(42)
    shuffled_idxs = np.random.choice(range(X.shape[0]), size=X.shape[0], replace=False)
    for fold_i, test_idxs in enumerate(np.array_split(shuffled_idxs, n_cv_splits)):
        train_mask = np.ones(X.shape[0], bool)
        train_mask[test_idxs] = 0

        # create train/text X, y
        train_X, test_X = X[train_mask, :], X[test_idxs, :]
        train_y, test_y = y[train_mask], y[test_idxs]

        # create correlation matrix between train_X and train_y
        if verbose:
            print(f'CV Fold: {fold_i+1:<10} Computing correlations between train_X and train_y...')
        with warnings.catch_warnings():
            # we expect pearsonr to throw PearsonRConstantInputWarning because of contant valued columns in X
            warnings.simplefilter("ignore")
            train_pheno_corr_p = [stats.pearsonr(train_X[:, i], train_y) for i in range(train_X.shape[1])]
            train_corrs = np.array([x[0] for x in train_pheno_corr_p])
            train_pvals = np.array([x[1] for x in train_pheno_corr_p])
            # create masks for edges below p-threshold and split pos/neg correlations
            keep_edges_pos = (train_corrs > 0) & (train_pvals < p_threshold)
            keep_edges_neg = (train_corrs < 0) & (train_pvals < p_threshold)

        # sum X entries with significant correlations with y
        train_pos_edges_sum = train_X[:, keep_edges_pos].sum(1)
        train_neg_edges_sum = train_X[:, keep_edges_neg].sum(1)
        test_pos_edges_sum = test_X[:, keep_edges_pos].sum(1)
        test_neg_edges_sum = test_X[:, keep_edges_neg].sum(1)

        # fit linear regression models based on summed values
        fit_pos = linear_model.LinearRegression(fit_intercept=True).fit(train_pos_edges_sum.reshape(-1, 1), train_y)
        fit_neg = linear_model.LinearRegression(fit_intercept=True).fit(train_neg_edges_sum.reshape(-1, 1), train_y)
        fit_pos_neg = linear_model.LinearRegression(fit_intercept=True).fit(np.stack((train_pos_edges_sum, train_neg_edges_sum)).T, train_y)

        # compute train errors
        train_pos_error = pred_summary_function(train_y, fit_pos.predict(train_pos_edges_sum.reshape(-1, 1)))
        train_neg_error = pred_summary_function(train_y, fit_neg.predict(train_neg_edges_sum.reshape(-1, 1)))
        train_posneg_error = pred_summary_function(train_y, fit_pos_neg.predict(np.stack((train_pos_edges_sum, train_neg_edges_sum)).T))

        # compute testing errors
        test_pos_error = pred_summary_function(test_y, fit_pos.predict(test_pos_edges_sum.reshape(-1, 1)))
        test_neg_error = pred_summary_function(test_y, fit_neg.predict(test_neg_edges_sum.reshape(-1, 1)))
        test_posneg_error = pred_summary_function(test_y, fit_pos_neg.predict(np.stack((test_pos_edges_sum, test_neg_edges_sum)).T))

        fold_accs_train.append((train_pos_error, train_neg_error, train_posneg_error))
        fold_accs_test.append((test_pos_error, test_neg_error, test_posneg_error))

        if verbose:
            print(f'CV Fold: {fold_i+1:<10} Train Pos-Edges Model Accuracy: {round(train_pos_error, 3):<10} Train Neg-Edges Model Accuracy: {round(train_neg_error, 3):<10} Train Pos/Neg-Edges Model Accuracy: {round(train_posneg_error, 3):<10}')
            print(f'CV Fold: {fold_i+1:<10} Test  Pos-Edges Model Accuracy: {round(test_pos_error, 3):<10} Test  Neg-Edges Model Accuracy: {round(test_neg_error, 3):<10} Test  Pos/Neg-Edges Model Accuracy: {round(test_posneg_error, 3):<10}')


    plt.figure(figsize=(13, 7))
    plt.plot(range(1, len(fold_accs_train)+1), [x[0] for x in fold_accs_train], linestyle='--', marker='o', color='C0', label='Train Pos-Edges Model')
    plt.plot(range(1, len(fold_accs_train)+1), [x[1] for x in fold_accs_train], linestyle='--', marker='o', color='C1', label='Train Neg-Edges Model')
    plt.plot(range(1, len(fold_accs_train)+1), [x[2] for x in fold_accs_train], linestyle='--', marker='o', color='C2', label='Train Pos/Neg-Edges Model')

    plt.plot(range(1, len(fold_accs_test)+1), [x[0] for x in fold_accs_test], linestyle='-', marker='o', color='C0', label='Test  Pos-Edges Model')
    plt.plot(range(1, len(fold_accs_test)+1), [x[1] for x in fold_accs_test], linestyle='-', marker='o', color='C1', label='Test  Neg-Edges Model')
    plt.plot(range(1, len(fold_accs_test)+1), [x[2] for x in fold_accs_test], linestyle='-', marker='o', color='C2', label='Test  Pos/Neg-Edges Model')

    plt.title(pred_summary_function.__name__, fontsize=20)
    plt.xticks(range(1, len(fold_accs_test)+1))
    plt.xlabel('CV Fold')
    plt.legend(fontsize=10)
    plt.show()

    return fold_accs_train, fold_accs_test
fold_accs_train_z, fold_accs_test_z = cpm(hcp_z, gscores, p_threshold=0.01, n_cv_splits=5, pred_summary_function=metrics.mean_absolute_error, verbose=True)
CV Fold: 1          Computing correlations between train_X and train_y...
CV Fold: 1          Train Pos-Edges Model Accuracy: 0.652      Train Neg-Edges Model Accuracy: 0.673      Train Pos/Neg-Edges Model Accuracy: 0.644
CV Fold: 1          Test  Pos-Edges Model Accuracy: 0.636      Test  Neg-Edges Model Accuracy: 0.671      Test  Pos/Neg-Edges Model Accuracy: 0.632
CV Fold: 2          Computing correlations between train_X and train_y...
CV Fold: 2          Train Pos-Edges Model Accuracy: 0.648      Train Neg-Edges Model Accuracy: 0.678      Train Pos/Neg-Edges Model Accuracy: 0.636
CV Fold: 2          Test  Pos-Edges Model Accuracy: 0.651      Test  Neg-Edges Model Accuracy: 0.659      Test  Pos/Neg-Edges Model Accuracy: 0.662
CV Fold: 3          Computing correlations between train_X and train_y...
CV Fold: 3          Train Pos-Edges Model Accuracy: 0.644      Train Neg-Edges Model Accuracy: 0.662      Train Pos/Neg-Edges Model Accuracy: 0.636
CV Fold: 3          Test  Pos-Edges Model Accuracy: 0.65       Test  Neg-Edges Model Accuracy: 0.708      Test  Pos/Neg-Edges Model Accuracy: 0.646
CV Fold: 4          Computing correlations between train_X and train_y...
CV Fold: 4          Train Pos-Edges Model Accuracy: 0.653      Train Neg-Edges Model Accuracy: 0.676      Train Pos/Neg-Edges Model Accuracy: 0.648
CV Fold: 4          Test  Pos-Edges Model Accuracy: 0.626      Test  Neg-Edges Model Accuracy: 0.659      Test  Pos/Neg-Edges Model Accuracy: 0.625
CV Fold: 5          Computing correlations between train_X and train_y...
CV Fold: 5          Train Pos-Edges Model Accuracy: 0.631      Train Neg-Edges Model Accuracy: 0.666      Train Pos/Neg-Edges Model Accuracy: 0.62
CV Fold: 5          Test  Pos-Edges Model Accuracy: 0.704      Test  Neg-Edges Model Accuracy: 0.696      Test  Pos/Neg-Edges Model Accuracy: 0.697
../_images/other_predictive_models_51_1.png
fold_accs_train_ct, fold_accs_test_ct = cpm(hcp_ct, gscores, p_threshold=0.01, n_cv_splits=5, pred_summary_function=metrics.mean_absolute_error, verbose=True)
CV Fold: 1          Computing correlations between train_X and train_y...
CV Fold: 1          Train Pos-Edges Model Accuracy: 0.675      Train Neg-Edges Model Accuracy: 0.673      Train Pos/Neg-Edges Model Accuracy: 0.659
CV Fold: 1          Test  Pos-Edges Model Accuracy: 0.659      Test  Neg-Edges Model Accuracy: 0.67       Test  Pos/Neg-Edges Model Accuracy: 0.653
CV Fold: 2          Computing correlations between train_X and train_y...
CV Fold: 2          Train Pos-Edges Model Accuracy: 0.674      Train Neg-Edges Model Accuracy: 0.678      Train Pos/Neg-Edges Model Accuracy: 0.636
CV Fold: 2          Test  Pos-Edges Model Accuracy: 0.661      Test  Neg-Edges Model Accuracy: 0.657      Test  Pos/Neg-Edges Model Accuracy: 0.668
CV Fold: 3          Computing correlations between train_X and train_y...
CV Fold: 3          Train Pos-Edges Model Accuracy: 0.659      Train Neg-Edges Model Accuracy: 0.665      Train Pos/Neg-Edges Model Accuracy: 0.644
CV Fold: 3          Test  Pos-Edges Model Accuracy: 0.699      Test  Neg-Edges Model Accuracy: 0.704      Test  Pos/Neg-Edges Model Accuracy: 0.684
CV Fold: 4          Computing correlations between train_X and train_y...
CV Fold: 4          Train Pos-Edges Model Accuracy: 0.674      Train Neg-Edges Model Accuracy: 0.678      Train Pos/Neg-Edges Model Accuracy: 0.658
CV Fold: 4          Test  Pos-Edges Model Accuracy: 0.653      Test  Neg-Edges Model Accuracy: 0.656      Test  Pos/Neg-Edges Model Accuracy: 0.638
CV Fold: 5          Computing correlations between train_X and train_y...
CV Fold: 5          Train Pos-Edges Model Accuracy: 0.662      Train Neg-Edges Model Accuracy: 0.666      Train Pos/Neg-Edges Model Accuracy: 0.642
CV Fold: 5          Test  Pos-Edges Model Accuracy: 0.709      Test  Neg-Edges Model Accuracy: 0.698      Test  Pos/Neg-Edges Model Accuracy: 0.708
../_images/other_predictive_models_52_1.png

Lasso (Linear Regression + L1 Regularization)

# LassoCV uses coordinate descent to select hyperparameter alpha
alpha_grid = np.array([10**a for a in np.arange(-3, 3, 0.25)])
lassoCV_model_z = linear_model.LassoCV(cv=5, n_alphas=len(alpha_grid), alphas=alpha_grid, fit_intercept=True, random_state=42, verbose=True, n_jobs=5).fit(train_data_z, train_phen)
[Parallel(n_jobs=5)]: Using backend ThreadingBackend with 5 concurrent workers.
.....................................................................................................................[Parallel(n_jobs=5)]: Done   2 out of   5 | elapsed:    0.5s remaining:    0.7s
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.07308221069854426, tolerance: 0.04611195889050071
  model = cd_fast.enet_coordinate_descent_gram(
./usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.15375414865695802, tolerance: 0.03970345334827422
  model = cd_fast.enet_coordinate_descent_gram(
./usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.10611096508367268, tolerance: 0.04382929483334259
  model = cd_fast.enet_coordinate_descent_gram(
.[Parallel(n_jobs=5)]: Done   5 out of   5 | elapsed:    0.5s finished
# LassoCV uses coordinate descent to select hyperparameter alpha
alpha_grid = np.array([10**a for a in np.arange(-3, 3, 0.25)])
lassoCV_model_ct = linear_model.LassoCV(cv=5, n_alphas=len(alpha_grid), alphas=alpha_grid, fit_intercept=True,  random_state=42, verbose=True, n_jobs=5).fit(train_data_ct, train_phen)
[Parallel(n_jobs=5)]: Using backend ThreadingBackend with 5 concurrent workers.
....................................................................................................................[Parallel(n_jobs=5)]: Done   2 out of   5 | elapsed:    0.2s remaining:    0.3s
....[Parallel(n_jobs=5)]: Done   5 out of   5 | elapsed:    0.3s finished
plt.figure(figsize=(10, 7))
plt.plot(lassoCV_model_z.alphas_, lassoCV_model_z.mse_path_, ':')
plt.plot(lassoCV_model_z.alphas_, lassoCV_model_z.mse_path_.mean(axis=-1), color='k', marker='o', label='Mean MSE Across Folds Z model', linewidth=2)
plt.axvline(x=100, linestyle='--', c='r')
plt.xlabel('Alpha')
plt.ylabel('MSE')
plt.legend()
plt.show()
../_images/other_predictive_models_56_0.png
plt.figure(figsize=(10, 7))
plt.plot(lassoCV_model_ct.alphas_, lassoCV_model_ct.mse_path_, ':')
plt.plot(lassoCV_model_ct.alphas_, lassoCV_model_ct.mse_path_.mean(axis=-1), color='k', marker='o', label='Mean MSE Across Folds CT model', linewidth=2)
plt.axvline(x=100, linestyle='--', c='r')
plt.xlabel('Alpha')
plt.ylabel('MSE')
plt.legend()
plt.show()
../_images/other_predictive_models_57_0.png
# based on cv results above, set alpha=100
lasso_model_z = linear_model.Lasso(alpha=lassoCV_model_z.alpha_, fit_intercept=True).fit(train_data_z, train_phen)
# based on cv results above, set alpha=100
lasso_model_ct = linear_model.Lasso(alpha=lassoCV_model_ct.alpha_, fit_intercept=True).fit(train_data_ct, train_phen)
train_preds_lasso_model_z = lasso_model_z.predict(train_data_z)
test_preds_lasso_model_z = lasso_model_z.predict(test_data_z)

train_mae_z = metrics.mean_absolute_error(train_phen, train_preds_lasso_model_z)
test_mae_z = metrics.mean_absolute_error(test_phen, test_preds_lasso_model_z)

train_preds_lasso_model_ct = lasso_model_ct.predict(train_data_ct)
test_preds_lasso_model_ct = lasso_model_ct.predict(test_data_ct)

train_mae_ct = metrics.mean_absolute_error(train_phen, train_preds_lasso_model_ct)
test_mae_ct = metrics.mean_absolute_error(test_phen, test_preds_lasso_model_ct)

print(f'Train MAE Z model: {train_mae_z:.3f}')
print(f'Test MAE Z model: {test_mae_z:.3f}')
print(f'Train MAE CT model: {train_mae_ct:.3f}')
print(f'Test MAE CT model: {test_mae_ct:.3f}')
Train MAE Z model: 0.620
Test MAE Z model: 0.682
Train MAE CT model: 0.650
Test MAE CT model: 0.697

Ridge (Linear Regression + L2 Regularization)

# RidgeCV uses generalized cross validation to select hyperparameter alpha
with warnings.catch_warnings():
    # ignore matrix decomposition errors
    warnings.simplefilter("ignore")
    ridgeCV_model_z = linear_model.RidgeCV(alphas=(0.1, 1.0, 10.0), fit_intercept=True, cv=5).fit(train_data_z, train_phen)
# RidgeCV uses generalized cross validation to select hyperparameter alpha
with warnings.catch_warnings():
    # ignore matrix decomposition errors
    warnings.simplefilter("ignore")
    ridgeCV_model_ct = linear_model.RidgeCV(alphas=(0.1, 1.0, 10.0), fit_intercept=True,  cv=5).fit(train_data_ct, train_phen)
ridge_alpha_z = ridgeCV_model_z.alpha_
print(f'CV Selected Alpha Z model = {ridge_alpha_z:.3f}')
CV Selected Alpha Z model = 10.000
ridge_alpha_ct = ridgeCV_model_ct.alpha_
print(f'CV Selected Alpha CT model = {ridge_alpha_ct:.3f}')
CV Selected Alpha CT model = 10.000
ridge_model_z = linear_model.Ridge(alpha=ridge_alpha_z, fit_intercept=True).fit(train_data_z, train_phen)
ridge_model_ct = linear_model.Ridge(alpha=ridge_alpha_ct, fit_intercept=True).fit(train_data_ct, train_phen)
train_preds_ridge_model_z = ridge_model_z.predict(train_data_z)
test_preds_ridge_model_z = ridge_model_z.predict(test_data_z)

train_mae_z = metrics.mean_absolute_error(train_phen, train_preds_ridge_model_z)
test_mae_z = metrics.mean_absolute_error(test_phen, test_preds_ridge_model_z)

train_preds_ridge_model_ct = ridge_model_ct.predict(train_data_ct)
test_preds_ridge_model_ct = ridge_model_ct.predict(test_data_ct)

train_mae_ct = metrics.mean_absolute_error(train_phen, train_preds_ridge_model_ct)
test_mae_ct = metrics.mean_absolute_error(test_phen, test_preds_ridge_model_ct)

print(f'Train MAE Z model: {train_mae_z:.3f}')
print(f'Test MAE Z model: {test_mae_z:.3f}')
print(f'Train MAE CT model: {train_mae_ct:.3f}')
print(f'Test MAE CT model: {test_mae_ct:.3f}')
Train MAE Z model: 0.527
Test MAE Z model: 0.734
Train MAE CT model: 0.600
Test MAE CT model: 0.692

Elastic Net (Linear Regression + L1/L2 Regularization)

# RidgeCV uses generalized cross validation to select hyperparameter alpha
elasticnetCV_model_z = linear_model.ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1], cv=5, n_alphas=len(alpha_grid), alphas=alpha_grid, random_state=42, verbose=True, n_jobs=5).fit(train_data_z, train_phen)
[Parallel(n_jobs=5)]: Using backend ThreadingBackend with 5 concurrent workers.
............................................................................................................./usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.21318694590257792, tolerance: 0.0423918944559644
  model = cd_fast.enet_coordinate_descent_gram(
./usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.17936527851907158, tolerance: 0.03970345334827422
  model = cd_fast.enet_coordinate_descent_gram(
.../usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.8618322913218321, tolerance: 0.04611195889050071
  model = cd_fast.enet_coordinate_descent_gram(
./usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 11.57867990423236, tolerance: 0.0423918944559644
  model = cd_fast.enet_coordinate_descent_gram(
./usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 10.2273489799189, tolerance: 0.03970345334827422
  model = cd_fast.enet_coordinate_descent_gram(
................../usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.4036642558553467, tolerance: 0.04401109832998077
  model = cd_fast.enet_coordinate_descent_gram(
.................../usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 8.073063075099014, tolerance: 0.04382929483334259
  model = cd_fast.enet_coordinate_descent_gram(
../usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 18.227358858718446, tolerance: 0.04611195889050071
  model = cd_fast.enet_coordinate_descent_gram(
............................................/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 14.883650580549045, tolerance: 0.04401109832998077
  model = cd_fast.enet_coordinate_descent_gram(
....................................../usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.18805636326129616, tolerance: 0.04611195889050071
  model = cd_fast.enet_coordinate_descent_gram(
............../usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.0544661418971657, tolerance: 0.0423918944559644
  model = cd_fast.enet_coordinate_descent_gram(
........................................................................................................./usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.1788130249701112, tolerance: 0.03970345334827422
  model = cd_fast.enet_coordinate_descent_gram(
............/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.13839227040918445, tolerance: 0.04611195889050071
  model = cd_fast.enet_coordinate_descent_gram(
........................................................................................................../usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.15009167262110168, tolerance: 0.04382929483334259
  model = cd_fast.enet_coordinate_descent_gram(
........................................./usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.20204581109658193, tolerance: 0.03970345334827422
  model = cd_fast.enet_coordinate_descent_gram(
............................/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.09891903798924773, tolerance: 0.04611195889050071
  model = cd_fast.enet_coordinate_descent_gram(
......................................................................./usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.13078279402705562, tolerance: 0.04382929483334259
  model = cd_fast.enet_coordinate_descent_gram(
../usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.18265009272980137, tolerance: 0.03970345334827422
  model = cd_fast.enet_coordinate_descent_gram(
..../usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.0877297694903234, tolerance: 0.04611195889050071
  model = cd_fast.enet_coordinate_descent_gram(
............................................................................................../usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.11087317503455552, tolerance: 0.04382929483334259
  model = cd_fast.enet_coordinate_descent_gram(
...................../usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.16051209546739642, tolerance: 0.03970345334827422
  model = cd_fast.enet_coordinate_descent_gram(
..................................../usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.07594686106816084, tolerance: 0.04611195889050071
  model = cd_fast.enet_coordinate_descent_gram(
............................................................./usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.10611096508367268, tolerance: 0.04382929483334259
  model = cd_fast.enet_coordinate_descent_gram(
...../usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.15375414865695802, tolerance: 0.03970345334827422
  model = cd_fast.enet_coordinate_descent_gram(
../usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:683: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.07308221069854426, tolerance: 0.04611195889050071
  model = cd_fast.enet_coordinate_descent_gram(
..[Parallel(n_jobs=5)]: Done  35 out of  35 | elapsed:    6.3s finished
# RidgeCV uses generalized cross validation to select hyperparameter alpha
elasticnetCV_model_ct = linear_model.ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1], cv=5, n_alphas=len(alpha_grid), alphas=alpha_grid, random_state=42, verbose=True, n_jobs=5).fit(train_data_ct, train_phen)
[Parallel(n_jobs=5)]: Using backend ThreadingBackend with 5 concurrent workers.
........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................[Parallel(n_jobs=5)]: Done  35 out of  35 | elapsed:    1.8s finished
print(f'CV selected alpha Z model {elasticnetCV_model_z.alpha_:.3f}')
print(f'Elastic net L1 ratio Z model {elasticnetCV_model_z.l1_ratio_:.3f}')
print(f'CV selected alpha CT model {elasticnetCV_model_ct.alpha_:.3f}')
print(f'Elastic net L1 ratio CT model {elasticnetCV_model_ct.l1_ratio_:.3f}')
CV selected alpha Z model 0.056
Elastic net L1 ratio Z model 0.700
CV selected alpha CT model 0.032
Elastic net L1 ratio CT model 0.100
plt.figure(figsize=(10, 7))
plt.plot(elasticnetCV_model_z.alphas_, elasticnetCV_model_z.mse_path_[1, :, :], ':')
plt.plot(elasticnetCV_model_z.alphas_, elasticnetCV_model_z.mse_path_[1, :, :].mean(axis=-1), color='k', marker='o', label='Mean MSE Across Folds', linewidth=2)
plt.axvline(x=200, linestyle='--', c='r')
plt.title('Alpha vs. MSE Z model')
plt.xlabel('Alpha')
plt.ylabel('MSE')
plt.legend()
plt.show()
../_images/other_predictive_models_73_0.png
plt.figure(figsize=(10, 7))
plt.plot(elasticnetCV_model_ct.alphas_, elasticnetCV_model_ct.mse_path_[1, :, :], ':')
plt.plot(elasticnetCV_model_ct.alphas_, elasticnetCV_model_ct.mse_path_[1, :, :].mean(axis=-1), color='k', marker='o', label='Mean MSE Across Folds', linewidth=2)
plt.axvline(x=200, linestyle='--', c='r')
plt.title('Alpha vs. MSE CT model')
plt.xlabel('Alpha')
plt.ylabel('MSE')
plt.legend()
plt.show()
../_images/other_predictive_models_74_0.png
elasticnet_model_z = linear_model.ElasticNet(alpha=elasticnetCV_model_z.alpha_, l1_ratio=elasticnetCV_model_z.l1_ratio_, fit_intercept=True,  random_state=42).fit(train_data_z, train_phen)

train_preds_en_model_z = elasticnet_model_z.predict(train_data_z)
test_preds_en_model_z = elasticnet_model_z.predict(test_data_z)

train_mae_z = metrics.mean_absolute_error(train_phen, train_preds_en_model_z)
test_mae_z = metrics.mean_absolute_error(test_phen, test_preds_en_model_z)

elasticnet_model_ct = linear_model.ElasticNet(alpha=elasticnetCV_model_ct.alpha_, l1_ratio=elasticnetCV_model_ct.l1_ratio_, fit_intercept=True, random_state=42).fit(train_data_ct, train_phen)

train_preds_en_model_ct = elasticnet_model_ct.predict(train_data_ct)
test_preds_en_model_ct = elasticnet_model_ct.predict(test_data_ct)

train_mae_ct = metrics.mean_absolute_error(train_phen, train_preds_en_model_ct)
test_mae_ct = metrics.mean_absolute_error(test_phen, test_preds_en_model_ct)

print(f'Train MAE Z model: {train_mae_z:.3f}')
print(f'Test MAE Z model: {test_mae_z:.3f}')
print(f'Train MAE CT model: {train_mae_ct:.3f}')
print(f'Test MAE CT model: {test_mae_ct:.3f}')
Train MAE Z model: 0.611
Test MAE Z model: 0.680
Train MAE CT model: 0.633
Test MAE CT model: 0.692