# principles and practice of sem, 5th ed.
# rex b. kline, guilford press, 2023

# chapter 8, table 8.1, analysis 3a
# local estimations of causal effects in the roth et al. (1989)
# parametric path model of illness through adjustment sets (OLS)

# to avoid the problem that some R packages share the same name 
# for different functions, all functions are specified next as 

# package::function

# which prevents masking, or the default hiding of 
# a function with a redundant name from a package used next

date()

v <- R.Version()
print(paste0(v$language, " version ", v$major, ".",
 v$minor, " (", v$year, "-", v$month, "-", v$day, ")"))

library(piecewiseSEM)
library(semTools)
library(lavaan)

# packages semTools and lavaan are used to generate raw
# scores for analysis in package piecewiseSEM

# get citation information
citation("piecewiseSEM", auto = TRUE)
citation("semTools", auto = TRUE)
citation("lavaan", auto = TRUE)

# generate raw scores with descriptive statistics that 
# exactly match those from the summarey data in table 4.3
# reuses syntax from analysis 1 ("roth-generate-scores.r") to
# avoid reading in an external raw data file
# functions from semTools and lavaan

roth.cor <- matrix(c(1.00, -.03,  .39, -.05, -.08,
                     -.03, 1.00,  .07, -.23, -.16,
                      .39,  .07, 1.00, -.13, -.29,
                     -.05, -.23, -.13, 1.00,  .34,
                     -.08, -.16, -.29,  .34, 1.00), ncol = 5, nrow = 5)
roth.sd <- c(66.5, 38.0, 18.4, 33.5, 62.48)
roth.cov <- diag(roth.sd) %*% roth.cor %*% diag(roth.sd)
c = 372/373
roth.cov = c * roth.cov
roth.data <- semTools::kd(roth.cov, 373, type = "exact")
names(roth.data) <- c("exercise", "hardy", "fitness", "stress", "illness")
roth.data <- transform(roth.data, exercise = exercise + 40.9)
roth.data <- transform(roth.data, fitness = fitness + 67.1)
roth.data <- transform(roth.data, stress = stress + 24.0)
roth.data <- transform(roth.data, illness = illness + 71.67)

# turn off scientific notation for printing of 
# standardized coefficients
# options(scipen=999)

# OLS (ordinary least squares) with adjustment sets
# unstandardized estimates only

# direct effects

# exercise -> fitness, no adjust
ef <- lm(fitness ~ exercise, data = roth.data)
coef(ef)
coef(summary(ef))[, "Std. Error"]
confint(ef)

# hardy -> stress, no adjust
hs <- lm(stress ~ hardy, data = roth.data)
coef(hs)
coef(summary(hs))[, "Std. Error"]
confint(hs)

# fitness -> illness, adjust = stress
fi_s <- lm(illness ~ fitness + stress, data = roth.data)
coef(fi_s)
coef(summary(fi_s))[, "Std. Error"]
confint(fi_s)

# fitness -> illness, adjust = exercise
fi_e <- lm(illness ~ fitness + exercise, data = roth.data)
coef(fi_e)
coef(summary(fi_e))[, "Std. Error"]
confint(fi_e)

# fitness -> illness, adjust = hardy
fi_h <- lm(illness ~ fitness + hardy, data = roth.data)
coef(fi_h)
coef(summary(fi_h))[, "Std. Error"]
confint(fi_h)

# stress -> illness, adjust = fitness
si_f <- lm(illness ~ stress + fitness, data = roth.data)
coef(si_f)
coef(summary(si_f))[, "Std. Error"]
confint(si_f)

# stress -> illness, adjust = exercise
si_e <- lm(illness ~ stress + exercise, data = roth.data)
coef(si_e)
coef(summary(si_e))[, "Std. Error"]
confint(si_e)

# stress -> illness, adjust = hardy
si_h <- lm(illness ~ stress + hardy, data = roth.data)
coef(si_h)
coef(summary(si_h))[, "Std. Error"]
confint(si_h)

# indirect effects estimated as total effects

# turn off scientific notation for standardized 
# estimates
options(scipen=999)

# exercise -> fitness -> illness, adjust = hardy
efi_h <- lm(illness ~ exercise + hardy, data = roth.data)
coef(efi_h)
coef(summary(efi_h))[, "Std. Error"]
confint(efi_h)
efi_hs <- lm(scale(illness) ~ scale(exercise) + scale(hardy),
 data = roth.data)
coef(summary(efi_hs))

# exercise -> fitness -> illness, adjust = stress
efi_s <- lm(illness ~ exercise + stress, data = roth.data)
coef(efi_s)
coef(summary(efi_s))[, "Std. Error"]
confint(efi_s)
efi_ss <- lm(scale(illness) ~ scale(exercise) + scale(stress),
 data = roth.data)
coef(summary(efi_ss))


# hardy -> stress -> illness, adjust = exercise
hsi_e <- lm(illness ~ hardy + exercise, data = roth.data)
coef(hsi_e)
coef(summary(hsi_e))[, "Std. Error"]
confint(hsi_e)
hsi_es <- lm(scale(illness) ~ scale(hardy) + scale(exercise),
 data = roth.data)
coef(summary(hsi_es))

# hardy -> stress -illness, adjust = fitness
hsi_f <- lm(illness ~ hardy + fitness, data = roth.data)
coef(hsi_f)
coef(summary(hsi_f))[, "Std. Error"]
confint(hsi_f)
hsi_fs <- lm(scale(illness) ~ scale(hardy) + scale(fitness),
 data = roth.data)
coef(summary(hsi_fs))

# estimate direct effects controlling for all
# parents of each outcome

# define DAG in package piecwiseSEM
# exercise and hardy covary (%~~%)

roth.model <- piecewiseSEM::psem(
 lm(illness ~ stress + fitness, data = roth.data),
 lm(fitness ~ exercise, data = roth.data),
 lm(stress ~ hardy, data = roth.data),
 exercise %~~% hardy,
 data = roth.data
)

out <- summary(roth.model, standardize, .progressBar = FALSE)
out$coefficients
out$R2

# get greater precision for R-squared values
summary(lm(illness ~ fitness + stress, data = roth.data))$r.squared
summary(lm(fitness ~ exercise, data = roth.data))$r.squared
summary(lm(stress ~ hardy, data = roth.data))$r.squared