# Import gusto, gustoB, and sample4 data setsgusto <-read.csv("data/gusto1.csv") # GUSTO sample with 40830 patientsgustoB <-read.csv("data/gustoB.csv") # GUSTO part B sample with 20318 patientsgustos <-read.csv("data/sample4.csv") # GUSTO sample4 with 785 patientssource("R/auc.nonpara.mw.R")source("R/ci.auc.R")source("R/val.prob.ci.2.R")# levels(gustos$HRT) <- c("No tachycardia", "Tachycardia")dd <-datadist(gustos)options(datadist ="dd")
Evaluate interactions with age in a full model, which includes 8 predictors in total. The data set is small (sample4, n=785, 52 events)
Code show/hide
### Full model and age interactionsfull <-lrm(DAY30 ~ AGE + KILLIP + HIG + DIA + HYP + HRT + TTR + SEX, data = gustos, x = T, y = T, linear.predictors = F)fullint <-lrm(DAY30 ~ AGE * (KILLIP + HIG + DIA + HYP + HRT + TTR + SEX), data = gustos, x = T, y = T, linear.predictors = F)anova(fullint)
Wald Statistics Response: DAY30
Factor Chi-Square d.f. P
AGE (Factor+Higher Order Factors) 29.93 8 0.0002
All Interactions 11.01 7 0.1380
KILLIP (Factor+Higher Order Factors) 4.96 2 0.0839
All Interactions 1.01 1 0.3160
HIG (Factor+Higher Order Factors) 6.32 2 0.0424
All Interactions 2.20 1 0.1380
DIA (Factor+Higher Order Factors) 3.70 2 0.1571
All Interactions 0.64 1 0.4220
HYP (Factor+Higher Order Factors) 5.62 2 0.0603
All Interactions 1.46 1 0.2267
HRT (Factor+Higher Order Factors) 13.50 2 0.0012
All Interactions 4.49 1 0.0341
TTR (Factor+Higher Order Factors) 8.31 2 0.0157
All Interactions 2.80 1 0.0942
SEX (Factor+Higher Order Factors) 0.79 2 0.6736
All Interactions 0.42 1 0.5151
AGE * KILLIP (Factor+Higher Order Factors) 1.01 1 0.3160
AGE * HIG (Factor+Higher Order Factors) 2.20 1 0.1380
AGE * DIA (Factor+Higher Order Factors) 0.64 1 0.4220
AGE * HYP (Factor+Higher Order Factors) 1.46 1 0.2267
AGE * HRT (Factor+Higher Order Factors) 4.49 1 0.0341
AGE * TTR (Factor+Higher Order Factors) 2.80 1 0.0942
AGE * SEX (Factor+Higher Order Factors) 0.42 1 0.5151
TOTAL INTERACTION 11.01 7 0.1380
TOTAL 64.59 15 <.0001
Code show/hide
### Select only interaction AGE * HRTfullints <-lrm(DAY30 ~ AGE + KILLIP + HIG + DIA + HYP + HRT + TTR + SEX + AGE * HRT, data = gustos, x = T, y = T, linear.predictors = T)anova(fullints)
Wald Statistics Response: DAY30
Factor Chi-Square d.f. P
AGE (Factor+Higher Order Factors) 26.67 2 <.0001
All Interactions 2.70 1 0.1004
KILLIP 3.52 1 0.0605
HIG 5.38 1 0.0204
DIA 3.14 1 0.0763
HYP 3.49 1 0.0618
HRT (Factor+Higher Order Factors) 11.67 2 0.0029
All Interactions 2.70 1 0.1004
TTR 5.66 1 0.0174
SEX 0.19 1 0.6602
AGE * HRT (Factor+Higher Order Factors) 2.70 1 0.1004
TOTAL 65.30 9 <.0001
Fig 12.1
Make 2 plots with linear interaction, in the small n=785 sample, and in the full n=40830 sample
Fig 12.2
Make 4 plots with main effects, linear interaction, and 2 variants of interaction only above age 55. The variable is (Age-55)[+].
In the last graph, the green dotted line follows the angle from below age 55 years (only 1 Age effect is estimated for the HRT==0 and HRT==1 and age<55 patients). In the pre-final graph, there are 2 separate angles from age 55 for HRT==0 and HRT==1 (barely noticable for the green dotted line).
Smart coding illustration
Smart coding of age effect: separate for no HRT (HRT==0) and for HRT (HRT==1)
Code show/hide
# Smart coding of age effect: separate for no HRT (HRT==0) and for HRT (HRT==1)gustos$AGE0 <- gustos$AGE * (1- gustos$HRT)gustos$AGE1 <- gustos$AGE * gustos$HRT# Standardlrm(DAY30 ~ AGE + KILLIP + HIG + DIA + HYP + HRT + TTR + SEX + AGE * HRT, data = gustos, x = T, y = T, linear.predictors = F)
Logistic Regression Model
lrm(formula = DAY30 ~ AGE + KILLIP + HIG + DIA + HYP + HRT +
TTR + SEX + AGE * HRT, data = gustos, x = T, y = T, linear.predictors = F)
Model Likelihood Discrimination Rank Discrim.
Ratio Test Indexes Indexes
Obs 785 LR chi2 86.28 R2 0.270 C 0.831
0 733 d.f. 9 R2(9,785)0.094 Dxy 0.662
1 52 Pr(> chi2) <0.0001 R2(9,145.7)0.412 gamma 0.662
max |deriv| 1e-09 Brier 0.051 tau-a 0.082
Coef S.E. Wald Z Pr(>|Z|)
Intercept -9.7431 1.6574 -5.88 <0.0001
AGE 0.0759 0.0240 3.16 0.0016
KILLIP 0.4595 0.2448 1.88 0.0605
HIG 0.7900 0.3407 2.32 0.0204
DIA 0.7804 0.4403 1.77 0.0763
HYP 1.0403 0.5570 1.87 0.0618
HRT -3.6376 2.8352 -1.28 0.1995
TTR 0.8201 0.3447 2.38 0.0174
SEX -0.1534 0.3490 -0.44 0.6602
AGE * HRT 0.0655 0.0399 1.64 0.1004
Code show/hide
# Smart codinglrm(DAY30 ~ AGE0 + AGE1 + HRT + KILLIP + HIG + DIA + HYP + TTR + SEX, data = gustos, x = T, y = T, linear.predictors = F)
Logistic Regression Model
lrm(formula = DAY30 ~ AGE0 + AGE1 + HRT + KILLIP + HIG + DIA +
HYP + TTR + SEX, data = gustos, x = T, y = T, linear.predictors = F)
Model Likelihood Discrimination Rank Discrim.
Ratio Test Indexes Indexes
Obs 785 LR chi2 86.28 R2 0.270 C 0.831
0 733 d.f. 9 R2(9,785)0.094 Dxy 0.662
1 52 Pr(> chi2) <0.0001 R2(9,145.7)0.412 gamma 0.662
max |deriv| 1e-09 Brier 0.051 tau-a 0.082
Coef S.E. Wald Z Pr(>|Z|)
Intercept -9.7431 1.6574 -5.88 <0.0001
AGE0 0.0759 0.0240 3.16 0.0016
AGE1 0.1414 0.0332 4.26 <0.0001
HRT -3.6376 2.8352 -1.28 0.1995
KILLIP 0.4595 0.2448 1.88 0.0605
HIG 0.7900 0.3407 2.32 0.0204
DIA 0.7804 0.4403 1.77 0.0763
HYP 1.0403 0.5570 1.87 0.0618
TTR 0.8201 0.3447 2.38 0.0174
SEX -0.1534 0.3490 -0.44 0.6602
Code show/hide
# Identical fit, easier interpretation# Age 55 as reference for HRT effectgustos$AGE0 <- (gustos$AGE -55) * (1- gustos$HRT)gustos$AGE1 <- (gustos$AGE -55) * gustos$HRTlrm(DAY30 ~ AGE0 + AGE1 + HRT + KILLIP + HIG + DIA + HYP + TTR + SEX, data = gustos, x = T, y = T, linear.predictors = F)
Logistic Regression Model
lrm(formula = DAY30 ~ AGE0 + AGE1 + HRT + KILLIP + HIG + DIA +
HYP + TTR + SEX, data = gustos, x = T, y = T, linear.predictors = F)
Model Likelihood Discrimination Rank Discrim.
Ratio Test Indexes Indexes
Obs 785 LR chi2 86.28 R2 0.270 C 0.831
0 733 d.f. 9 R2(9,785)0.094 Dxy 0.662
1 52 Pr(> chi2) <0.0001 R2(9,145.7)0.412 gamma 0.662
max |deriv| 4e-11 Brier 0.051 tau-a 0.082
Coef S.E. Wald Z Pr(>|Z|)
Intercept -5.5703 0.5746 -9.69 <0.0001
AGE0 0.0759 0.0240 3.16 0.0016
AGE1 0.1414 0.0332 4.26 <0.0001
HRT -0.0333 0.7043 -0.05 0.9623
KILLIP 0.4595 0.2448 1.88 0.0605
HIG 0.7900 0.3407 2.32 0.0204
DIA 0.7804 0.4403 1.77 0.0763
HYP 1.0403 0.5570 1.87 0.0618
TTR 0.8201 0.3447 2.38 0.0174
SEX -0.1534 0.3490 -0.44 0.6602
Code show/hide
# Even nicer interpretation, HRT effect for age=55
The fit of each of the models is identical (always LR chi2=86.28); each model allows for linear interaction between AGE and HRT. The interpretation of the model is easier if the age effects are estimated for HRT==1 and for HRT==0. Scaling is easier by subtracting 55 from AGE (AGE-55); this implies the HRT effect relates to age 55.
Table 12.2 Better predictions?
Assess the performance if the models created in n=785 in an independent validation part, GustoB, n=20318.
Plots created with a modification of Frank Harrell’s val.prob() function:
Code show/hide
# Validate in independent part, named gustoB# main effectslrm.val.full <-predict(full, newdata = gustoB, type ="lp")# simple interactionlrm.val.int1 <-predict(fullints, newdata = gustoB, type ="lp")# Plotval.prob.ci.2(y = gustoB[, "DAY30"], logit = lrm.val.full, riskdist ="predicted", logistic.cal = F,smooth ="rcs", nr.knots =3, g =8, xlim =c(0, .5), ylim =c(0, .5),legendloc =c(0.18, 0.15), statloc =c(0, .4), roundstats =3,xlab ="Predicted probability from n=785", ylab ="Observed proportion in n=20318")
Conclusions Discrimination: worse with interactions than without. Calibration: We note some overfitting, as expected by a fit in a small sample.