Aliases: augment.lm
Keywords:
### ** Examples library(ggplot2) library(dplyr)
mod <- lm(mpg ~ wt + qsec, data = mtcars) tidy(mod)
# A tibble: 3 × 5 term estimate std.error statistic p.value <chr> <dbl> <dbl> <dbl> <dbl> 1 (Intercept) 19.7 5.25 3.76 7.65e- 4 2 wt -5.05 0.484 -10.4 2.52e-11 3 qsec 0.929 0.265 3.51 1.50e- 3
glance(mod)
# A tibble: 1 × 12 r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> 1 0.826 0.814 2.60 69.0 9.39e-12 2 -74.4 157. 163. # … with 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
# coefficient plot d <- tidy(mod, conf.int = TRUE) ggplot(d, aes(estimate, term, xmin = conf.low, xmax = conf.high, height = 0)) + geom_point() + geom_vline(xintercept = 0, lty = 4) + geom_errorbarh()
# aside: There are tidy() and glance() methods for lm.summary objects too. # this can be useful when you want to conserve memory by converting large lm # objects into their leaner summary.lm equivalents. s <- summary(mod) tidy(s, conf.int = TRUE)
# A tibble: 3 × 7 term estimate std.error statistic p.value conf.low conf.high <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> 1 (Intercept) 19.7 5.25 3.76 7.65e- 4 9.00 30.5 2 wt -5.05 0.484 -10.4 2.52e-11 -6.04 -4.06 3 qsec 0.929 0.265 3.51 1.50e- 3 0.387 1.47
glance(s)
# A tibble: 1 × 8 r.squared adj.r.squared sigma statistic p.value df df.residual nobs <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <dbl> 1 0.826 0.814 2.60 69.0 9.39e-12 2 29 32
augment(mod)
# A tibble: 32 × 10 .rownames mpg wt qsec .fitted .resid .hat .sigma .cooksd .std.resid <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> 1 Mazda RX4 21 2.62 16.5 21.8 -0.815 0.0693 2.64 2.63e-3 -0.325 2 Mazda RX4… 21 2.88 17.0 21.0 -0.0482 0.0444 2.64 5.59e-6 -0.0190 3 Datsun 710 22.8 2.32 18.6 25.3 -2.53 0.0607 2.60 2.17e-2 -1.00 4 Hornet 4 … 21.4 3.22 19.4 21.6 -0.181 0.0576 2.64 1.05e-4 -0.0716 5 Hornet Sp… 18.7 3.44 17.0 18.2 0.504 0.0389 2.64 5.29e-4 0.198 6 Valiant 18.1 3.46 20.2 21.1 -2.97 0.0957 2.58 5.10e-2 -1.20 7 Duster 360 14.3 3.57 15.8 16.4 -2.14 0.0729 2.61 1.93e-2 -0.857 8 Merc 240D 24.4 3.19 20 22.2 2.17 0.0791 2.61 2.18e-2 0.872 9 Merc 230 22.8 3.15 22.9 25.1 -2.32 0.295 2.59 1.59e-1 -1.07 10 Merc 280 19.2 3.44 18.3 19.4 -0.185 0.0358 2.64 6.55e-5 -0.0728 # … with 22 more rows
augment(mod, mtcars, interval = "confidence")
# A tibble: 32 × 20 .rownames mpg cyl disp hp drat wt qsec vs am gear carb <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> 1 Mazda RX4 21 6 160 110 3.9 2.62 16.5 0 1 4 4 2 Mazda RX4 … 21 6 160 110 3.9 2.88 17.0 0 1 4 4 3 Datsun 710 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 4 Hornet 4 D… 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 5 Hornet Spo… 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 6 Valiant 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 7 Duster 360 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 8 Merc 240D 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 9 Merc 230 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 10 Merc 280 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 # … with 22 more rows, and 8 more variables: .fitted <dbl>, .lower <dbl>, # .upper <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, # .std.resid <dbl>
# predict on new data newdata <- mtcars %>% head(6) %>% mutate(wt = wt + 1) augment(mod, newdata = newdata)
# A tibble: 6 × 14 .rownames mpg cyl disp hp drat wt qsec vs am gear carb <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> 1 Mazda RX4 21 6 160 110 3.9 3.62 16.5 0 1 4 4 2 Mazda RX4 W… 21 6 160 110 3.9 3.88 17.0 0 1 4 4 3 Datsun 710 22.8 4 108 93 3.85 3.32 18.6 1 1 4 1 4 Hornet 4 Dr… 21.4 6 258 110 3.08 4.22 19.4 1 0 3 1 5 Hornet Spor… 18.7 8 360 175 3.15 4.44 17.0 0 0 3 2 6 Valiant 18.1 6 225 105 2.76 4.46 20.2 1 0 3 1 # … with 2 more variables: .fitted <dbl>, .resid <dbl>
# ggplot2 example where we also construct 95% prediction interval # simpler bivariate model since we're plotting in 2D mod2 <- lm(mpg ~ wt, data = mtcars) au <- augment(mod2, newdata = newdata, interval = "prediction") ggplot(au, aes(wt, mpg)) + geom_point() + geom_line(aes(y = .fitted)) + geom_ribbon(aes(ymin = .lower, ymax = .upper), col = NA, alpha = 0.3)
# predict on new data without outcome variable. Output does not include .resid newdata <- newdata %>% select(-mpg) augment(mod, newdata = newdata)
Warning: 'newdata' had 6 rows but variables found have 234 rows
# A tibble: 6 × 12 .rownames cyl disp hp drat wt qsec vs am gear carb .fitted <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> 1 Mazda RX4 6 160 110 3.9 3.62 16.5 0 1 4 4 16.8 2 Mazda RX4… 6 160 110 3.9 3.88 17.0 0 1 4 4 16.0 3 Datsun 710 4 108 93 3.85 3.32 18.6 1 1 4 1 20.3 4 Hornet 4 … 6 258 110 3.08 4.22 19.4 1 0 3 1 16.5 5 Hornet Sp… 8 360 175 3.15 4.44 17.0 0 0 3 2 13.1 6 Valiant 6 225 105 2.76 4.46 20.2 1 0 3 1 16.0
au <- augment(mod, data = mtcars) ggplot(au, aes(.hat, .std.resid)) + geom_vline(size = 2, colour = "white", xintercept = 0) + geom_hline(size = 2, colour = "white", yintercept = 0) + geom_point() + geom_smooth(se = FALSE)
plot(mod, which = 6)
ggplot(au, aes(.hat, .cooksd)) + geom_vline(xintercept = 0, colour = NA) + geom_abline(slope = seq(0, 3, by = 0.5), colour = "white") + geom_smooth(se = FALSE) + geom_point()
# column-wise models a <- matrix(rnorm(20), nrow = 10) b <- a + rnorm(length(a)) result <- lm(b ~ a) tidy(result)
# A tibble: 6 × 6 response term estimate std.error statistic p.value <chr> <chr> <dbl> <dbl> <dbl> <dbl> 1 Y1 (Intercept) 0.773 0.276 2.80 0.0265 2 Y1 a1 1.67 0.312 5.33 0.00109 3 Y1 a2 -0.458 0.364 -1.26 0.250 4 Y2 (Intercept) -0.784 0.346 -2.27 0.0576 5 Y2 a1 -0.413 0.392 -1.06 0.326 6 Y2 a2 0.842 0.457 1.84 0.108