Setting up / loading packages

knitr::opts_chunk$set(echo=TRUE,  message=FALSE, warning=FALSE)
library(tidyverse)
library(tidymodels)
library(ggrepel)

Reading files

# Substitute this for the path where you keep your files
my_home <- "~/Google Drive/My Drive/extracurricular/Presentations/"

# State area measurements from https://www.census.gov/geographies/reference-files/2010/geo/state-area.html 
# State abbreviations from https://about.usps.com/who-we-are/postal-history/state-abbreviations.htm 
state_areas <- read_tsv(paste0(my_home, "state_areas.tsv"), col_names = TRUE, show_col_types = FALSE)
state_abbrev <- read_tsv(paste0(my_home, "state_abbreviations.tsv"), col_names = TRUE, show_col_types = FALSE)

Using tidyverse packages to wrangle and display data

1. What does our data look like?

Check shape, display by a certain order, use specialized functions like glimpse()
# check shape
dim(state_areas)
## [1] 57  7
dim(state_abbrev)
## [1] 52  6
# displaying
head(state_abbrev)
# changing the order of the rows & displaying
state_areas %>% arrange(desc(Name)) %>% head()
# glimpse is another way to see column types
glimpse(state_areas)
## Rows: 57
## Columns: 7
## $ Name              <chr> "United States", "Alabama", "Alaska", "Arizona", "Ar…
## $ Total_Sq_Mi       <dbl> 3796742, 52420, 665384, 113990, 53179, 163695, 10409…
## $ Total_Sq_Km       <dbl> 9833517, 135767, 1723337, 295234, 137732, 423967, 26…
## $ Land_Sq_Mi        <dbl> 3531905, 50645, 570641, 113594, 52035, 155779, 10364…
## $ Land_Sq_Km        <dbl> 9147593, 131171, 1477953, 294207, 134771, 403466, 26…
## $ Water_Total_Sq_Mi <dbl> 264837, 1775, 94743, 396, 1143, 7916, 452, 701, 540,…
## $ Water_Total_Sq_Km <dbl> 685924, 4597, 245383, 1026, 2961, 20501, 1170, 1816,…
Manipulate dataframe using filter(), select(), rename(); add the variables we want with left_join() and mutate()
# we'll come up with a made-up category -- "dry" or "water" states
# based on the total water area / total area
states_pct_water <- state_areas %>%
  filter((Name != "United States") & (Name != "District of Columbia")) %>%
  select(Name, ends_with("Mi")) %>% # keep the area data in miles
  mutate(pct_water = 100 * (Water_Total_Sq_Mi / Total_Sq_Mi), 
         category = ifelse(as.numeric(pct_water) > 20, "water state", "dry state"))
# cleaning up the abbreviations dataframe, renaming the column we're interested in (present abbreviation)
state_abbrev <- state_abbrev %>%
  select(Name, ends_with("present")) %>%
  rename(abbreviation = ends_with("present")) %>% #new name = old name
  mutate(abbreviation = str_sub(abbreviation,1,2)) #make sure abbreviation is two letters
# joining to our water table
states_pct_water <- left_join(states_pct_water, state_abbrev, by = "Name") 
head(states_pct_water)
# see how our percentage calculation & join went -- any NA's or join by wrong key?
library(naniar)
vis_miss(states_pct_water)

# in addition to glimpse, another method from outside the tidyverse is skim
library(skimr)
skim(states_pct_water) 
Data summary
Name states_pct_water
Number of rows 55
Number of columns 7
_______________________
Column type frequency:
character 3
numeric 4
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
Name 0 1.00 4 24 0 55 0
category 0 1.00 9 11 0 2 0
abbreviation 4 0.93 2 2 0 51 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Total_Sq_Mi 0 1 69197.49 95173.42 571.00 18318.00 53819.00 82923.5 665384.00 ▇▁▁▁▁
Land_Sq_Mi 0 1 64288.58 84220.46 76.00 16872.50 50645.00 78225.5 570641.00 ▇▁▁▁▁
Water_Total_Sq_Mi 0 1 4908.96 13637.82 192.00 596.00 1494.00 3625.0 94743.00 ▇▁▁▁▁
pct_water 0 1 14.09 21.70 0.24 1.78 4.16 16.5 90.74 ▇▁▁▁▁
# exclude entries that do not have a matching abbreviation
states_pct_water <- filter(states_pct_water, !is.na(abbreviation)) 
head(states_pct_water)

2. Plotting associations between variables

Starting to ask more pointed questions of our data, use group_by() and summarize() to get summary statistics
# grouping and summarizing
states_category_mean_area <- states_pct_water %>%
  group_by(category) %>%
  summarize(mean_total_area = mean(Total_Sq_Mi))

states_category_mean_area
Changing the shape of our dataframes with pivot_longer() and pivot_wider() (formerly spread() and gather())
# pivot_longer (formerly "gather")
# this lets us work with the legend of different types of areas more easily
states_pct_water_long <- states_pct_water %>%
  pivot_longer(cols = ends_with("Mi"), names_to = "area_type", values_to = "area_sq_mi")
head(states_pct_water_long)
A sampling of what we can visualize with ggplot2
# A simple histogram of our made-up metric, "pct_water"
ggplot(data = states_pct_water) + 
  geom_histogram(mapping = aes(x = pct_water))

# what is the overall distribution of areas? differentiate by water/land
# facet_wrap -- somewhat independent plots by category
ggplot(data = states_pct_water_long, mapping = aes(x = area_sq_mi)) + 
  geom_histogram() + 
  facet_wrap(~area_type, scales = "free")

ggplot(data = states_pct_water_long, mapping = aes(x = area_sq_mi)) + 
  geom_density() + 
  facet_wrap(~area_type, scales = "free")

# do our made up "dry states" have less water after all?
# plot together but split by "fill" as opposed to facet_wrap; can't set independent axes in this
ggplot(data = states_pct_water_long, mapping = aes(x = category, y = area_sq_mi, fill = area_type)) +
  geom_violin() +
  scale_y_log10()

ggplot(data = states_pct_water_long, mapping = aes(x = category, y = area_sq_mi)) +
  geom_violin(mapping = aes(fill = area_type)) + 
  geom_boxplot() + # because the fill is not inherited from ggplot() call, boxplots are of overall areas
  geom_jitter(width = 0.05) +
  scale_y_log10()

ggplot(data = states_pct_water_long) + 
  geom_bar(mapping = aes(x = category, y = area_sq_mi, fill = area_type),
           position = "fill", stat = "identity")

3. Customizing plots

Is our metric of %water biased by small, coastal/island states? A simple scatterplot
sp <- ggplot(states_pct_water) +
  geom_point(mapping = aes(x = Total_Sq_Mi, y = pct_water))
sp

Custom scaling, adding titles
# adding a scale to the dots
get_closest_quantile <- function(my_state_area){
  # generate quantiles by 10%
  q <- quantile(states_pct_water$Total_Sq_Mi, probs = seq(0,1,by=0.1))
  # get the index of the closest quantile to my specific state area
  q_idx <- which.min(abs(q - my_state_area))
  # return a number 1-10 (divided by 10 to make a decimal)
  return(q_idx/10)
}

# some baseline dot size + an added scaling factor according to the closest percentile
states_pct_water <- states_pct_water %>%
  mutate(dot_size = 3 + as.numeric(unlist(lapply(Total_Sq_Mi, get_closest_quantile))))

# add title, caption
# in addition to sizing by our new scale, add transparency (alpha)
sp <- ggplot(states_pct_water, aes(x = Total_Sq_Mi, y = pct_water)) +
  geom_point(mapping = aes(color = category, size = dot_size), alpha = 0.5) +
  labs(title = "% of Water Area vs. Total Area of State",
       caption = "Source: US Census Bureau \n census.gov/geographies/reference-files/2010/geo/state-area.html")

sp

Adding a custom theme, more layers with labels
clean_theme <- theme_classic() + # layer our modifications on top of a default ggplot theme
  theme(plot.title = element_text(family = "serif", size = 15, face = 'italic',  hjust = 0.5, vjust = 0.5),
        plot.caption = element_text(family = "serif", size = 8, face = 'bold', hjust = 1, vjust = 0.5),
        axis.title = element_text(family = "serif", size = 10),
        axis.text = element_text(family = "serif", size = 10),
        axis.text.x = element_text(family = "serif", size = 10),
        legend.title = element_text(family = "serif", size = 10),
        legend.text = element_text(family = "serif", size = 10),
        legend.position = "bottom",
        plot.margin = margin(1,2,1,2, "cm")) # top, right, bottom, left margins

sp <- sp + 
  scale_color_manual(values = c("firebrick4", "deepskyblue3")) + # adjust the color legend
  scale_size(guide="none") + # hide the size legend
  xlab("Total Area (sq. mi)") + 
  ylab("Water % of State Area") + 
  scale_x_log10() +
  clean_theme

sp + 
  geom_text(data = filter(states_pct_water, pct_water > 5), #subsetting to avoid crowding 
            mapping = aes(label = abbreviation), hjust = 0, vjust = -0.5, size = 3) 

# alternative to geom_text, ggrepel deals with crowding labels
sp + 
  ylim(-12,50) + 
  geom_text_repel(mapping = aes(label = Name),
                  size = 2.5, max.overlaps = 50, force_pull = 0.01)

Statistics with tidymodels, other tidyverse extensions

Overview of a second, independent dataset– this time using skim()

# we'll bring in one of the default R datasets -- us_rent_income; see data()
skim(us_rent_income)
Data summary
Name us_rent_income
Number of rows 104
Number of columns 5
_______________________
Column type frequency:
character 3
numeric 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
GEOID 0 1 2 2 0 52 0
NAME 0 1 4 20 0 52 0
variable 0 1 4 6 0 2 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
estimate 1 0.99 14923.08 14459.89 464 864.5 1507 28872.0 43198 ▇▁▂▆▁
moe 1 0.99 95.50 119.74 2 4.5 18 153.5 681 ▇▃▁▁▁

How well do the new variables correlate with our land/water area data?

# merge our state water data with rent/income data
all_state_data <- us_rent_income %>%
  rename(Name = NAME) %>%
  select(-moe, -GEOID) %>%
  pivot_wider(names_from = variable, values_from = estimate) %>%
  left_join(states_pct_water, by = "Name") %>% 
  na.omit()

head(all_state_data)
library(GGally)
# can also get nice plots using ggpairs
all_num_state_data <- all_state_data %>% select(-dot_size) %>% select_if(is.double)
ggpairs(all_num_state_data)

library(corrr)
# can easily get correlation tables and plots using corrr from tidymodels
all_num_state_data_cor <- all_num_state_data %>% correlate() %>% rearrange() %>% shave()

fashion(all_num_state_data_cor) # text display in a nice format
rplot(all_num_state_data_cor) # circle plot with correlation magnitude

# can also make other fancier correlation plots
all_num_state_data %>%
  correlate() %>% 
  network_plot(min_cor = .1)

  • unsurprisingly, rent/income are correlated to each other; land/total square miles are in turn correlated to each other

    • i.e. our two separate datasets have (mostly) nothing to do with each other
  • pct_water seems to be behaving weirdly: correlates better with rent than with total water sq. miles

  • we can use our somewhat orthogonal variables to reduce dimensions using PCA– see which states are more similar to each other

    • we expect PC1 and PC2 to reflect rent/income and land or water area (respectively); and in turn we expect pct_water to not explain much
    • can check this hypothesis from the loadings

What principal components explain differences between states?

Run PCA
# first make a matrix with states as row names
states_mat <- as.data.frame(all_num_state_data)
rownames(states_mat) <- all_state_data$Name

# run PCA
states_pca <- prcomp(states_mat, center = TRUE, scale = TRUE)

# what is the variance explained by each PC? individual + cumulative
states_pca_var <- data.frame(
  PC = seq(1, length(states_pca$sdev)), 
  stdev = states_pca$sdev,
  var = (states_pca$sdev)^2,
  var_pct = cumsum((states_pca$sdev)^2 / sum((states_pca$sdev)^2))
)

ggplot(states_pca_var) + 
  geom_line(mapping = aes(x = PC, y = var)) + 
  geom_point(mapping = aes(x = PC, y = var), size = 3, shape = 23, fill = "goldenrod") + 
  scale_x_continuous(labels = min(states_pca_var$PC):max(states_pca_var$PC), 
                     breaks = min(states_pca_var$PC):max(states_pca_var$PC)) +
  xlab("Principal Component") + 
  ylab("Variance explained") + 
  clean_theme

ggplot(states_pca_var) + 
  geom_line(mapping = aes(x = PC, y = var_pct)) + 
  geom_point(mapping = aes(x = PC, y = var_pct), size = 3, shape = 21, fill = "lawngreen") + 
  scale_x_continuous(labels = min(states_pca_var$PC):max(states_pca_var$PC), 
                     breaks = min(states_pca_var$PC):max(states_pca_var$PC)) +
  xlab("Principal Component") + 
  ylab("Cumulative % variance explained") + 
  clean_theme

Examine principal components and loadings
states_pca_df <- as.data.frame(states_pca$x) %>%
  mutate(Name = rownames(states_mat)) %>%
  select(Name, PC1, PC2) %>% 
  left_join(states_pct_water, by = "Name")

ggplot(states_pca_df, mapping = aes(x = PC1, y = PC2)) + 
  geom_point(mapping = aes(color = category)) + 
  geom_text_repel(mapping = aes(label = abbreviation), max.overlaps = 50) + 
  theme_bw()

library(pheatmap)
# pheatmap is another quick way to visualize your numerical tables
pheatmap(states_pca$rotation, cluster_cols = FALSE, cluster_rows = FALSE)

K-means clustering (code adapted from https://www.tidymodels.org/learn/statistics/k-means/)

state_data_pca_mat <- states_pca_df %>% select(PC1, PC2)
rownames(state_data_pca_mat) <- states_pca_df$Name

state_data_kclust2 <- kmeans(state_data_pca_mat, centers = 2)
summary(state_data_kclust2)
##              Length Class  Mode   
## cluster      50     -none- numeric
## centers       4     -none- numeric
## totss         1     -none- numeric
## withinss      2     -none- numeric
## tot.withinss  1     -none- numeric
## betweenss     1     -none- numeric
## size          2     -none- numeric
## iter          1     -none- numeric
## ifault        1     -none- numeric
state_data_kclust2_aug <- augment(state_data_kclust2, state_data_pca_mat)
state_data_kclust2_aug
state_data_kclust2_aug$.cluster
##        Alabama         Alaska        Arizona       Arkansas     California 
##              2              1              2              2              1 
##       Colorado    Connecticut       Delaware        Florida        Georgia 
##              1              1              1              1              2 
##         Hawaii          Idaho       Illinois        Indiana           Iowa 
##              1              2              2              2              2 
##         Kansas       Kentucky      Louisiana          Maine       Maryland 
##              2              2              2              2              1 
##  Massachusetts       Michigan      Minnesota    Mississippi       Missouri 
##              1              1              1              2              2 
##        Montana       Nebraska         Nevada  New Hampshire     New Jersey 
##              2              2              2              1              1 
##     New Mexico       New York North Carolina   North Dakota           Ohio 
##              2              1              2              2              2 
##       Oklahoma         Oregon   Pennsylvania   Rhode Island South Carolina 
##              2              2              2              1              2 
##   South Dakota      Tennessee          Texas           Utah        Vermont 
##              2              2              2              2              2 
##       Virginia     Washington  West Virginia      Wisconsin        Wyoming 
##              1              1              2              2              2 
## Levels: 1 2
# not incredibly informative, let's try iterating through possible values of k
state_data_kclust_iter <- 
  tibble(k = 1:6) %>%
  mutate(
    kclust = map(k, ~kmeans(state_data_pca_mat, .x)),
    tidied = map(kclust, tidy), # turn into a tibble
    glanced = map(kclust, glance), # summary "glance"
    augmented = map(kclust, augment, state_data_pca_mat) # augment our original data with k-means clusters
  )
# we have lists of tibbles, each corresponding to a k
state_data_kclust_iter
# the augmented list contains the data we want, again-- one tibble per value of k
state_data_kclust_iter$augmented
## [[1]]
## # A tibble: 50 × 4
##    .rownames      PC1    PC2 .cluster
##    <chr>        <dbl>  <dbl> <fct>   
##  1 Alabama     -0.859  1.40  1       
##  2 Alaska      10.6    1.07  1       
##  3 Arizona      0.209  0.709 1       
##  4 Arkansas    -0.957  1.69  1       
##  5 California   1.62  -0.728 1       
##  6 Colorado     0.489 -0.585 1       
##  7 Connecticut -0.451 -2.16  1       
##  8 Delaware    -0.632 -1.89  1       
##  9 Florida      0.219 -0.521 1       
## 10 Georgia     -0.452  0.499 1       
## # … with 40 more rows
## 
## [[2]]
## # A tibble: 50 × 4
##    .rownames      PC1    PC2 .cluster
##    <chr>        <dbl>  <dbl> <fct>   
##  1 Alabama     -0.859  1.40  2       
##  2 Alaska      10.6    1.07  1       
##  3 Arizona      0.209  0.709 2       
##  4 Arkansas    -0.957  1.69  2       
##  5 California   1.62  -0.728 1       
##  6 Colorado     0.489 -0.585 1       
##  7 Connecticut -0.451 -2.16  1       
##  8 Delaware    -0.632 -1.89  1       
##  9 Florida      0.219 -0.521 1       
## 10 Georgia     -0.452  0.499 2       
## # … with 40 more rows
## 
## [[3]]
## # A tibble: 50 × 4
##    .rownames      PC1    PC2 .cluster
##    <chr>        <dbl>  <dbl> <fct>   
##  1 Alabama     -0.859  1.40  3       
##  2 Alaska      10.6    1.07  1       
##  3 Arizona      0.209  0.709 3       
##  4 Arkansas    -0.957  1.69  3       
##  5 California   1.62  -0.728 2       
##  6 Colorado     0.489 -0.585 2       
##  7 Connecticut -0.451 -2.16  2       
##  8 Delaware    -0.632 -1.89  2       
##  9 Florida      0.219 -0.521 2       
## 10 Georgia     -0.452  0.499 3       
## # … with 40 more rows
## 
## [[4]]
## # A tibble: 50 × 4
##    .rownames      PC1    PC2 .cluster
##    <chr>        <dbl>  <dbl> <fct>   
##  1 Alabama     -0.859  1.40  4       
##  2 Alaska      10.6    1.07  2       
##  3 Arizona      0.209  0.709 1       
##  4 Arkansas    -0.957  1.69  4       
##  5 California   1.62  -0.728 1       
##  6 Colorado     0.489 -0.585 1       
##  7 Connecticut -0.451 -2.16  3       
##  8 Delaware    -0.632 -1.89  3       
##  9 Florida      0.219 -0.521 1       
## 10 Georgia     -0.452  0.499 4       
## # … with 40 more rows
## 
## [[5]]
## # A tibble: 50 × 4
##    .rownames      PC1    PC2 .cluster
##    <chr>        <dbl>  <dbl> <fct>   
##  1 Alabama     -0.859  1.40  1       
##  2 Alaska      10.6    1.07  2       
##  3 Arizona      0.209  0.709 3       
##  4 Arkansas    -0.957  1.69  1       
##  5 California   1.62  -0.728 5       
##  6 Colorado     0.489 -0.585 5       
##  7 Connecticut -0.451 -2.16  4       
##  8 Delaware    -0.632 -1.89  4       
##  9 Florida      0.219 -0.521 5       
## 10 Georgia     -0.452  0.499 3       
## # … with 40 more rows
## 
## [[6]]
## # A tibble: 50 × 4
##    .rownames      PC1    PC2 .cluster
##    <chr>        <dbl>  <dbl> <fct>   
##  1 Alabama     -0.859  1.40  6       
##  2 Alaska      10.6    1.07  4       
##  3 Arizona      0.209  0.709 5       
##  4 Arkansas    -0.957  1.69  6       
##  5 California   1.62  -0.728 3       
##  6 Colorado     0.489 -0.585 3       
##  7 Connecticut -0.451 -2.16  2       
##  8 Delaware    -0.632 -1.89  2       
##  9 Florida      0.219 -0.521 3       
## 10 Georgia     -0.452  0.499 5       
## # … with 40 more rows
# now plot each tibble per k, with PC1 and PC2 as the coordinates
state_data_kclust_iter_df <- unnest(state_data_kclust_iter, cols = c(augmented))
state_data_kclust_iter_df$.rownames <- state_abbrev$abbreviation[match(state_data_kclust_iter_df$.rownames, state_abbrev$Name)]

ggplot(data = state_data_kclust_iter_df, 
       mapping = aes(x = PC1, y = PC2)) +
  geom_point(mapping = aes(color = .cluster), alpha = 0.6, size = 4, shape = 21) + 
  geom_text(mapping = aes(label = .rownames), alpha = 0.8, size = 2) +
  facet_wrap(~k) + theme_minimal()

My session info

sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] pheatmap_1.0.12    corrr_0.4.3        GGally_2.1.2       skimr_2.1.3       
##  [5] naniar_0.6.1       ggrepel_0.9.1      yardstick_0.0.9    workflowsets_0.1.0
##  [9] workflows_0.2.4    tune_0.1.6         rsample_0.1.1      recipes_0.1.17    
## [13] parsnip_0.1.7      modeldata_0.1.1    infer_1.0.0        dials_0.1.0       
## [17] scales_1.1.1       broom_0.7.9        tidymodels_0.1.4   forcats_0.5.1     
## [21] stringr_1.4.0      dplyr_1.0.7        purrr_0.3.4        readr_2.0.1       
## [25] tidyr_1.2.0        tibble_3.1.6       ggplot2_3.3.5      tidyverse_1.3.1   
## 
## loaded via a namespace (and not attached):
##  [1] colorspace_2.0-2   ellipsis_0.3.2     class_7.3-19       visdat_0.5.3      
##  [5] base64enc_0.1-3    fs_1.5.0           rstudioapi_0.13    farver_2.1.0      
##  [9] listenv_0.8.0      furrr_0.2.3        bit64_4.0.5        prodlim_2019.11.13
## [13] fansi_0.5.0        lubridate_1.7.10   xml2_1.3.2         codetools_0.2-18  
## [17] splines_4.1.1      knitr_1.34         jsonlite_1.7.2     pROC_1.18.0       
## [21] dbplyr_2.1.1       compiler_4.1.1     httr_1.4.2         backports_1.2.1   
## [25] assertthat_0.2.1   Matrix_1.3-4       fastmap_1.1.0      cli_3.0.1         
## [29] htmltools_0.5.2    tools_4.1.1        gtable_0.3.0       glue_1.4.2        
## [33] Rcpp_1.0.7         cellranger_1.1.0   jquerylib_0.1.4    DiceDesign_1.9    
## [37] vctrs_0.3.8        iterators_1.0.14   timeDate_3043.102  gower_1.0.0       
## [41] xfun_0.26          globals_0.14.0     rvest_1.0.1        lifecycle_1.0.1   
## [45] future_1.23.0      MASS_7.3-54        TSP_1.1-11         ipred_0.9-12      
## [49] vroom_1.5.5        hms_1.1.0          parallel_4.1.1     RColorBrewer_1.1-2
## [53] yaml_2.2.1         rpart_4.1-15       reshape_0.8.8      stringi_1.7.4     
## [57] highr_0.9          foreach_1.5.2      seriation_1.3.1    lhs_1.1.3         
## [61] hardhat_0.2.0      lava_1.6.10        repr_1.1.4         rlang_0.4.11      
## [65] pkgconfig_2.0.3    evaluate_0.14      lattice_0.20-44    labeling_0.4.2    
## [69] bit_4.0.4          tidyselect_1.1.1   parallelly_1.30.0  plyr_1.8.6        
## [73] magrittr_2.0.1     R6_2.5.1           generics_0.1.0     DBI_1.1.1         
## [77] pillar_1.6.2       haven_2.4.3        withr_2.4.2        survival_3.2-11   
## [81] nnet_7.3-16        future.apply_1.8.1 modelr_0.1.8       crayon_1.4.1      
## [85] utf8_1.2.2         tzdb_0.1.2         rmarkdown_2.11     grid_4.1.1        
## [89] readxl_1.3.1       reprex_2.0.1       digest_0.6.28      munsell_0.5.0     
## [93] GPfit_1.0-8        registry_0.5-1
save.image(file = paste0(my_home, "R_tidyverse_data_science.RData"))