library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.4 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.0.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(stevemisc)
##
## Attaching package: 'stevemisc'
## The following object is masked from 'package:dplyr':
##
## tbl_df
Abortion <- readRDS(url("http://posc3410.svmiller.com/toy-data/wvs-usa-abortion.rds"))
# Install necessary packages first (if you don't have them).
# I tried to keep this as minimal as possible, but you'll need some additional packages.
# That's my bad. Sorry.
list.of.packages <- c("broom","arm","lme4","modelr")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
# Of note: modelr will be the only one I ask you to directly load right now
# broom and arm will be loaded quietly
library(modelr)
# First, let's get a histogram to see what we're dealing with here...
Abortion %>%
select(aj) %>% na.omit %>%
group_by(aj) %>%
tally() %>%
ggplot(.,aes(as.factor(aj), n)) + geom_bar(stat="identity") +
geom_text(aes(label=n), vjust = -.5) +
labs(x = "Justifiability of Abortion",
y = "Number of Observations in Each Unique Response Type",
caption = "Data: World Values Survey, 1981-2011")
# Let's go ahead and treat it as interval...
M1 <- lm(aj ~ age + I(age^2) + female + ideology + satisfinancial + cai + godimportant, data=Abortion)
summary(M1)
##
## Call:
## lm(formula = aj ~ age + I(age^2) + female + ideology + satisfinancial +
## cai + godimportant, data = Abortion)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.3235 -2.0804 -0.2974 1.7179 8.2837
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.119e+00 2.204e-01 32.303 < 2e-16 ***
## age 3.969e-02 8.248e-03 4.812 1.52e-06 ***
## I(age^2) -3.861e-04 8.439e-05 -4.575 4.82e-06 ***
## female 2.352e-01 5.406e-02 4.351 1.37e-05 ***
## ideology -2.237e-01 1.420e-02 -15.754 < 2e-16 ***
## satisfinancial 5.192e-03 1.131e-02 0.459 0.646
## cai 4.755e-01 2.540e-02 18.722 < 2e-16 ***
## godimportant -3.266e-01 1.113e-02 -29.333 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.57 on 9250 degrees of freedom
## (1129 observations deleted due to missingness)
## Multiple R-squared: 0.2123, Adjusted R-squared: 0.2117
## F-statistic: 356.1 on 7 and 9250 DF, p-value: < 2.2e-16
# I like broom::tidy summaries better than straight up summary()
broom::tidy(M1)
## # A tibble: 8 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 7.12 0.220 32.3 5.26e-217
## 2 age 0.0397 0.00825 4.81 1.52e- 6
## 3 I(age^2) -0.000386 0.0000844 -4.58 4.82e- 6
## 4 female 0.235 0.0541 4.35 1.37e- 5
## 5 ideology -0.224 0.0142 -15.8 3.34e- 55
## 6 satisfinancial 0.00519 0.0113 0.459 6.46e- 1
## 7 cai 0.475 0.0254 18.7 8.50e- 77
## 8 godimportant -0.327 0.0111 -29.3 6.34e-181
# Let's standardize the coefficients now...
Abortion %>%
mutate_at(vars("age", "ideology", "satisfinancial",
"cai", "godimportant"), list(z = ~r2sd(.))) %>%
rename_at(vars(contains("_z")),
~paste("z", gsub("_z", "", .), sep = "_") ) -> Abortion
# Let's get Model 2
M2 <- lm(aj ~ z_age + I(z_age^2) + female + z_ideology + z_satisfinancial + z_cai + z_godimportant, data=Abortion)
summary(M2)
##
## Call:
## lm(formula = aj ~ z_age + I(z_age^2) + female + z_ideology +
## z_satisfinancial + z_cai + z_godimportant, data = Abortion)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.3235 -2.0804 -0.2974 1.7179 8.2837
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.20115 0.04681 89.748 < 2e-16 ***
## z_age 0.15665 0.05697 2.749 0.00598 **
## I(z_age^2) -0.49830 0.10891 -4.575 4.82e-06 ***
## female 0.23522 0.05406 4.351 1.37e-05 ***
## z_ideology -0.87418 0.05549 -15.754 < 2e-16 ***
## z_satisfinancial 0.02544 0.05541 0.459 0.64612
## z_cai 1.07173 0.05724 18.722 < 2e-16 ***
## z_godimportant -1.71138 0.05834 -29.333 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.57 on 9250 degrees of freedom
## (1129 observations deleted due to missingness)
## Multiple R-squared: 0.2123, Adjusted R-squared: 0.2117
## F-statistic: 356.1 on 7 and 9250 DF, p-value: < 2.2e-16
broom::tidy(M2)
## # A tibble: 8 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 4.20 0.0468 89.7 0
## 2 z_age 0.157 0.0570 2.75 5.98e- 3
## 3 I(z_age^2) -0.498 0.109 -4.58 4.82e- 6
## 4 female 0.235 0.0541 4.35 1.37e- 5
## 5 z_ideology -0.874 0.0555 -15.8 3.34e- 55
## 6 z_satisfinancial 0.0254 0.0554 0.459 6.46e- 1
## 7 z_cai 1.07 0.0572 18.7 8.50e- 77
## 8 z_godimportant -1.71 0.0583 -29.3 6.34e-181
# Let's keep it easy and look at the effect of age with not standardizing it.
# To be clear: quantities of interest are going to be a case of six-to-one when standardizing or not standardizing it
# However, regression coefficients *will* change. Quantities of interest like these (simulated values of y) *won't*
# But, this will be a little more straightforward for students in seeing the underlying code. So let's roll with it.
M3 <- lm(aj ~ age + I(age^2) + female + z_ideology + z_satisfinancial + z_cai + z_godimportant, data=Abortion)
summary(M3)
##
## Call:
## lm(formula = aj ~ age + I(age^2) + female + z_ideology + z_satisfinancial +
## z_cai + z_godimportant, data = Abortion)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.3235 -2.0804 -0.2974 1.7179 8.2837
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.193e+00 1.860e-01 17.168 < 2e-16 ***
## age 3.969e-02 8.248e-03 4.812 1.52e-06 ***
## I(age^2) -3.861e-04 8.439e-05 -4.575 4.82e-06 ***
## female 2.352e-01 5.406e-02 4.351 1.37e-05 ***
## z_ideology -8.742e-01 5.549e-02 -15.754 < 2e-16 ***
## z_satisfinancial 2.544e-02 5.541e-02 0.459 0.646
## z_cai 1.072e+00 5.724e-02 18.722 < 2e-16 ***
## z_godimportant -1.711e+00 5.834e-02 -29.333 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.57 on 9250 degrees of freedom
## (1129 observations deleted due to missingness)
## Multiple R-squared: 0.2123, Adjusted R-squared: 0.2117
## F-statistic: 356.1 on 7 and 9250 DF, p-value: < 2.2e-16
# Let's create a hypothetical row of new data
# Herein, the respondent is a typical woman of average (technically: median) ideology, financial situation, authoritarian views, and religiosity.
# The only thing that varies is age, which goes from the minimum (17) to maximum (96).
Abortion %>%
data_grid(.model = M3, age=seq(min(age, na.rm=T), max(age, na.rm=T), by =1),
aj = 0) -> newdatM3
# Let's get some simulations from the multivariate normal:
simsM3 <- get_sims(M3, newdatM3, 1000, 8675309)
# Let's add in some identifiers so we better know what we're looking at:
newdatM3 %>%
dplyr::slice(rep(row_number(), 1000)) %>%
bind_cols(simsM3, .) -> simsM3
# And let's plot it...
# Grouping by 1000 simulations for each age, let's get the median and 95% bounds around it.
simsM3 %>%
group_by(age) %>%
summarize(meany = mean(y),
lwr = quantile(y, .025),
upr = quantile(y, .975)) %>%
ggplot(.,aes(age, meany, ymin=lwr, ymax=upr)) +
geom_ribbon(alpha=0.3, color="black") +
geom_line(color="blue") +
scale_x_continuous(breaks = seq(20, 100, by=5)) +
labs(x = "Age",
y = "Simulated Attitude Toward the Jusifiability of Abortion (with 95% Intervals)")
# Extra credit
# If you choose to do the extra credit, you'll need this package:
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
M4 <- lmer(aj ~ z_age + I(z_age^2) + female + z_ideology + z_satisfinancial + z_cai + z_godimportant + (1 | year),
data = Abortion)
summary(M4)
## Linear mixed model fit by REML ['lmerMod']
## Formula: aj ~ z_age + I(z_age^2) + female + z_ideology + z_satisfinancial +
## z_cai + z_godimportant + (1 | year)
## Data: Abortion
##
## REML criterion at convergence: 43657
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9523 -0.8043 -0.1270 0.6748 3.3040
##
## Random effects:
## Groups Name Variance Std.Dev.
## year (Intercept) 0.1036 0.3219
## Residual 6.5105 2.5516
## Number of obs: 9258, groups: year, 6
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 4.17455 0.13951 29.922
## z_age 0.02949 0.05837 0.505
## I(z_age^2) -0.34529 0.10927 -3.160
## female 0.23659 0.05372 4.404
## z_ideology -0.88014 0.05512 -15.967
## z_satisfinancial 0.07771 0.05541 1.402
## z_cai 0.99137 0.05733 17.293
## z_godimportant -1.69866 0.05824 -29.164
##
## Correlation of Fixed Effects:
## (Intr) z_age I(_^2) female z_dlgy z_stsf z_cai
## z_age 0.048
## I(z_age^2) -0.187 -0.262
## female -0.197 0.007 -0.006
## z_ideology -0.004 -0.031 -0.026 0.046
## z_satsfnncl 0.003 -0.146 -0.058 0.036 -0.116
## z_cai -0.009 0.046 0.035 -0.020 0.102 -0.012
## z_godmprtnt 0.017 -0.101 0.043 -0.139 -0.169 -0.018 0.314
show_ranef(M4, "year")