1-wvs-usa-abortion.R

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.4     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.0.2     ✓ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(stevemisc)

## 
## Attaching package: 'stevemisc'

## The following object is masked from 'package:dplyr':
## 
##     tbl_df

Abortion <- readRDS(url("http://posc3410.svmiller.com/toy-data/wvs-usa-abortion.rds"))

# Install necessary packages first (if you don't have them).
# I tried to keep this as minimal as possible, but you'll need some additional packages.
# That's my bad. Sorry.

list.of.packages <- c("broom","arm","lme4","modelr")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

# Of note: modelr will be the only one I ask you to directly load right now
# broom and arm will be loaded quietly

library(modelr)


# First, let's get a histogram to see what we're dealing with here...

Abortion %>%
  select(aj) %>% na.omit %>%
  group_by(aj) %>%
  tally() %>%
  ggplot(.,aes(as.factor(aj), n)) + geom_bar(stat="identity") +
  geom_text(aes(label=n), vjust = -.5) +
  labs(x = "Justifiability of Abortion",
       y = "Number of Observations in Each Unique Response Type",
       caption = "Data: World Values Survey, 1981-2011")

# Let's go ahead and treat it as interval...

M1 <- lm(aj ~ age + I(age^2)  + female + ideology + satisfinancial + cai  + godimportant, data=Abortion)
summary(M1)

## 
## Call:
## lm(formula = aj ~ age + I(age^2) + female + ideology + satisfinancial + 
##     cai + godimportant, data = Abortion)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.3235 -2.0804 -0.2974  1.7179  8.2837 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     7.119e+00  2.204e-01  32.303  < 2e-16 ***
## age             3.969e-02  8.248e-03   4.812 1.52e-06 ***
## I(age^2)       -3.861e-04  8.439e-05  -4.575 4.82e-06 ***
## female          2.352e-01  5.406e-02   4.351 1.37e-05 ***
## ideology       -2.237e-01  1.420e-02 -15.754  < 2e-16 ***
## satisfinancial  5.192e-03  1.131e-02   0.459    0.646    
## cai             4.755e-01  2.540e-02  18.722  < 2e-16 ***
## godimportant   -3.266e-01  1.113e-02 -29.333  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.57 on 9250 degrees of freedom
##   (1129 observations deleted due to missingness)
## Multiple R-squared:  0.2123, Adjusted R-squared:  0.2117 
## F-statistic: 356.1 on 7 and 9250 DF,  p-value: < 2.2e-16

# I like broom::tidy summaries better than straight up summary()
broom::tidy(M1)

## # A tibble: 8 × 5
##   term            estimate std.error statistic   p.value
##   <chr>              <dbl>     <dbl>     <dbl>     <dbl>
## 1 (Intercept)     7.12     0.220        32.3   5.26e-217
## 2 age             0.0397   0.00825       4.81  1.52e-  6
## 3 I(age^2)       -0.000386 0.0000844    -4.58  4.82e-  6
## 4 female          0.235    0.0541        4.35  1.37e-  5
## 5 ideology       -0.224    0.0142      -15.8   3.34e- 55
## 6 satisfinancial  0.00519  0.0113        0.459 6.46e-  1
## 7 cai             0.475    0.0254       18.7   8.50e- 77
## 8 godimportant   -0.327    0.0111      -29.3   6.34e-181

# Let's standardize the coefficients now...

Abortion %>%
  mutate_at(vars("age", "ideology", "satisfinancial",
                 "cai", "godimportant"), list(z = ~r2sd(.))) %>%
  rename_at(vars(contains("_z")),
            ~paste("z", gsub("_z", "", .), sep = "_") ) -> Abortion


# Let's get Model 2

M2 <- lm(aj ~ z_age + I(z_age^2)  + female + z_ideology + z_satisfinancial + z_cai + z_godimportant, data=Abortion)
summary(M2)

## 
## Call:
## lm(formula = aj ~ z_age + I(z_age^2) + female + z_ideology + 
##     z_satisfinancial + z_cai + z_godimportant, data = Abortion)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.3235 -2.0804 -0.2974  1.7179  8.2837 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.20115    0.04681  89.748  < 2e-16 ***
## z_age             0.15665    0.05697   2.749  0.00598 ** 
## I(z_age^2)       -0.49830    0.10891  -4.575 4.82e-06 ***
## female            0.23522    0.05406   4.351 1.37e-05 ***
## z_ideology       -0.87418    0.05549 -15.754  < 2e-16 ***
## z_satisfinancial  0.02544    0.05541   0.459  0.64612    
## z_cai             1.07173    0.05724  18.722  < 2e-16 ***
## z_godimportant   -1.71138    0.05834 -29.333  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.57 on 9250 degrees of freedom
##   (1129 observations deleted due to missingness)
## Multiple R-squared:  0.2123, Adjusted R-squared:  0.2117 
## F-statistic: 356.1 on 7 and 9250 DF,  p-value: < 2.2e-16

broom::tidy(M2)

## # A tibble: 8 × 5
##   term             estimate std.error statistic   p.value
##   <chr>               <dbl>     <dbl>     <dbl>     <dbl>
## 1 (Intercept)        4.20      0.0468    89.7   0        
## 2 z_age              0.157     0.0570     2.75  5.98e-  3
## 3 I(z_age^2)        -0.498     0.109     -4.58  4.82e-  6
## 4 female             0.235     0.0541     4.35  1.37e-  5
## 5 z_ideology        -0.874     0.0555   -15.8   3.34e- 55
## 6 z_satisfinancial   0.0254    0.0554     0.459 6.46e-  1
## 7 z_cai              1.07      0.0572    18.7   8.50e- 77
## 8 z_godimportant    -1.71      0.0583   -29.3   6.34e-181

# Let's keep it easy and look at the effect of age with not standardizing it.
# To be clear: quantities of interest are going to be a case of six-to-one when standardizing or not standardizing it
# However, regression coefficients *will* change. Quantities of interest like these (simulated values of y) *won't*
# But, this will be a little more straightforward for students in seeing the underlying code. So let's roll with it.

M3 <- lm(aj ~ age + I(age^2)  + female + z_ideology + z_satisfinancial + z_cai + z_godimportant, data=Abortion)
summary(M3)

## 
## Call:
## lm(formula = aj ~ age + I(age^2) + female + z_ideology + z_satisfinancial + 
##     z_cai + z_godimportant, data = Abortion)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.3235 -2.0804 -0.2974  1.7179  8.2837 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       3.193e+00  1.860e-01  17.168  < 2e-16 ***
## age               3.969e-02  8.248e-03   4.812 1.52e-06 ***
## I(age^2)         -3.861e-04  8.439e-05  -4.575 4.82e-06 ***
## female            2.352e-01  5.406e-02   4.351 1.37e-05 ***
## z_ideology       -8.742e-01  5.549e-02 -15.754  < 2e-16 ***
## z_satisfinancial  2.544e-02  5.541e-02   0.459    0.646    
## z_cai             1.072e+00  5.724e-02  18.722  < 2e-16 ***
## z_godimportant   -1.711e+00  5.834e-02 -29.333  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.57 on 9250 degrees of freedom
##   (1129 observations deleted due to missingness)
## Multiple R-squared:  0.2123, Adjusted R-squared:  0.2117 
## F-statistic: 356.1 on 7 and 9250 DF,  p-value: < 2.2e-16

# Let's create a hypothetical row of new data
# Herein, the respondent is a typical woman of average (technically: median) ideology, financial situation, authoritarian views, and religiosity.
# The only thing that varies is age, which goes from the minimum (17) to maximum (96).

Abortion %>%
  data_grid(.model = M3, age=seq(min(age, na.rm=T), max(age, na.rm=T), by =1),
            aj = 0) -> newdatM3


# Let's get some simulations from the multivariate normal:
simsM3 <- get_sims(M3, newdatM3, 1000, 8675309) 

# Let's add in some identifiers so we better know what we're looking at:
newdatM3 %>%
  dplyr::slice(rep(row_number(), 1000)) %>%
  bind_cols(simsM3, .) -> simsM3


# And let's plot it...
# Grouping by 1000 simulations for each age, let's get the median and 95% bounds around it.
simsM3 %>%
  group_by(age) %>%
  summarize(meany = mean(y),
            lwr = quantile(y, .025),
            upr = quantile(y, .975)) %>%
  ggplot(.,aes(age, meany, ymin=lwr, ymax=upr)) +
  geom_ribbon(alpha=0.3, color="black") +
  geom_line(color="blue") +
  scale_x_continuous(breaks = seq(20, 100, by=5)) +
  labs(x = "Age",
       y = "Simulated Attitude Toward the Jusifiability of Abortion (with 95% Intervals)")

# Extra credit
# If you choose to do the extra credit, you'll need this package:
library(lme4)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

M4 <- lmer(aj ~ z_age + I(z_age^2)  + female + z_ideology + z_satisfinancial + z_cai + z_godimportant + (1 | year), 
           data = Abortion)
summary(M4)

## Linear mixed model fit by REML ['lmerMod']
## Formula: aj ~ z_age + I(z_age^2) + female + z_ideology + z_satisfinancial +  
##     z_cai + z_godimportant + (1 | year)
##    Data: Abortion
## 
## REML criterion at convergence: 43657
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9523 -0.8043 -0.1270  0.6748  3.3040 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  year     (Intercept) 0.1036   0.3219  
##  Residual             6.5105   2.5516  
## Number of obs: 9258, groups:  year, 6
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)       4.17455    0.13951  29.922
## z_age             0.02949    0.05837   0.505
## I(z_age^2)       -0.34529    0.10927  -3.160
## female            0.23659    0.05372   4.404
## z_ideology       -0.88014    0.05512 -15.967
## z_satisfinancial  0.07771    0.05541   1.402
## z_cai             0.99137    0.05733  17.293
## z_godimportant   -1.69866    0.05824 -29.164
## 
## Correlation of Fixed Effects:
##             (Intr) z_age  I(_^2) female z_dlgy z_stsf z_cai 
## z_age        0.048                                          
## I(z_age^2)  -0.187 -0.262                                   
## female      -0.197  0.007 -0.006                            
## z_ideology  -0.004 -0.031 -0.026  0.046                     
## z_satsfnncl  0.003 -0.146 -0.058  0.036 -0.116              
## z_cai       -0.009  0.046  0.035 -0.020  0.102 -0.012       
## z_godmprtnt  0.017 -0.101  0.043 -0.139 -0.169 -0.018  0.314

show_ranef(M4, "year")

1-wvs-usa-abortion.R

steve

2022-01-21