library(tidyverse)
library(sf)
library(units)
library(leaflet)
library(readxl)
library(ggplot2)

1.1

USAboundaries::us_states
## function (map_date = NULL, resolution = c("low", "high"), states = NULL) 
## {
##     resolution <- match.arg(resolution)
##     if (is.null(map_date)) {
##         if (resolution == "low") {
##             shp <- USAboundaries::states_contemporary_lores
##         }
##         else if (resolution == "high") {
##             check_data_package()
##             shp <- USAboundariesData::states_contemporary_hires
##         }
##         shp <- filter_by_states(shp, states)
##     }
##     else {
##         map_date <- as.Date(map_date)
##         stopifnot(as.Date("1783-09-03") <= map_date, map_date <= 
##             as.Date("2000-12-31"))
##         check_data_package()
##         if (resolution == "low") {
##             shp <- USAboundariesData::states_historical_lores
##         }
##         else if (resolution == "high") {
##             shp <- USAboundariesData::states_historical_hires
##         }
##         shp <- filter_by_date(shp, map_date)
##         shp <- filter_by_states(shp, states)
##     }
##     shp
## }
## <bytecode: 0x0000000019279990>
## <environment: namespace:USAboundaries>
conus = USAboundaries::us_states() %>%
  filter(!state_name %in% c("Puerto Rico",
                            "Alaska",
                            "Hawaii"))

counties = USAboundaries::us_counties() %>%
  st_as_sf(counties) %>%
  filter(!state_name %in% c("Puerto Rico",
                            "Alaska",
                            "Hawaii")) %>%
  st_transform(counties, crs = 5070)

1.2

county_centroid = st_centroid(counties) %>%
  st_union()

1.3

# voronoi tessellation
vor_tess = st_voronoi(county_centroid) %>%
  st_cast() %>%
  st_as_sf() %>%
  mutate(id=1:n())

# triangulated tessellation
tri_tess = st_triangulate(county_centroid) %>%
  st_cast() %>%
  st_as_sf() %>%
  mutate(id=1:n())

# gridded coverage
gridded_counties = st_make_grid(county_centroid, n = 70, square = T) %>%
  st_cast() %>%
  st_as_sf() %>%
  mutate(id=1:n())

# hexagonal coverage
hex_counties = st_make_grid(county_centroid, n=70, square = F) %>%
  st_cast() %>%
  st_as_sf() %>%
  mutate(id=1:n())

1.4

vor_tess = st_intersection(vor_tess, st_union(counties))
tri_tess = st_intersection(tri_tess, st_union(counties))
gridded_counties = st_intersection(gridded_counties, st_union(counties))
hex_counties = st_intersection(hex_counties, st_union(counties))

1.5

counties_simp = rmapshaper::ms_simplify(counties, keep = .005)
## Registered S3 method overwritten by 'geojsonlint':
##   method         from 
##   print.location dplyr
mapview::npts(counties)
## [1] 51976
mapview::npts(counties_simp)
## [1] 21023

Just over 50% of the points were removed. This makes the edge of the border less accurate, but makes the computations faster.

1.6

plot_tess = function(sf_obj, title){
  ggplot()+
    geom_sf(data = sf_obj, fill = "white", col = "navy", size = .2)+
    theme_void() +
    labs(title = title, caption = paste("This tessellation has:", nrow(sf_obj), "features." ))
  }

1.7

plot_tess(vor_tess, "Voronoi Tessellation")

plot_tess(tri_tess, "Triangulated Tessellation")

plot_tess(gridded_counties, "Gridded Coverage")

plot_tess(hex_counties, "Hexagonal Coverage")

plot_tess(counties_simp, "Original")

Question 2

2.1

tess_sum = function(sf_obj, text){

  area = st_area(sf_obj) %>%
  units::set_units("km^2") %>%
  units::drop_units()

data.frame(num_features = nrow(sf_obj),
           mean_area = mean(area),
           sd_area = sd(area),
           total_area = sum(area),
           text = text
)


}

2.2

vor_sum_tess = tess_sum(vor_tess, "Voronoi")
tri_sum_tess = tess_sum(tri_tess, "Triangulation")
hex_sum = tess_sum(hex_counties, "Hexagon")
grid_sum = tess_sum(gridded_counties, "Grid")

2.3

summary_tess = bind_rows(
  tess_sum(counties_simp, "Original Counties"),
  tess_sum(tri_tess, "Triangulation"),
  tess_sum(vor_tess, "Voronoi"),
  tess_sum(hex_counties, "Hexagon"),
  tess_sum(gridded_counties, "Grid")
)

2.4

knitr::kable(summary_tess,
             caption = "Five Tessellation summaries",
             col.names = c("Number of Features", "Mean Area", "Standard Deviation of Features", "Total Area", "Text"),
             format.args = list(big.marks = ","))
Five Tessellation summaries
Number of Features Mean Area Standard Deviation of Features Total Area Text
3069 2539.396 3455.0175 7793407 Original Counties
6196 1251.808 1575.7996 7756200 Triangulation
3108 2521.745 2887.1397 7837583 Voronoi
1639 3494.238 382.4362 5727056 Hexagon
2013 2497.477 275.0625 5027422 Grid

2.5

For each of the tessellations, the area and number of polygons changed. This will cause the MAUP to change with the tessellations. The triangulation tessellation had the most number of polygons, and the hexagonal coverage had the about half the number of polygons. The total area was largest with the gridded coverage tessellation.

Question 3

3.1

dams <- read_excel("NID2019_U.xlsx")

dams_sf = dams %>%
  filter(!is.na(LONGITUDE), !is.na(LATITUDE)) %>%
  st_as_sf(coords = c("LONGITUDE","LATITUDE"), crs = 4326) %>%
  st_transform(5070)

3.2

pip_func = function(points, polygon, id){
  st_join(polygon, points) %>%
    st_drop_geometry() %>%
    count(get('id')) %>%
    setNames(c(id, "n")) %>%
    left_join(polygon, by = id) %>%
    st_as_sf()

}

#### second function for the original data because the id column is "geoid" and the function was only taking count(get("id)) with id in quotes ####


pip_func_county_simp = function(points, polygon, id){
  st_join(polygon, points) %>%
    st_drop_geometry() %>%
    count(get('geoid')) %>%
    setNames(c(id, "n")) %>%
    left_join(polygon, by = id) %>%
    st_as_sf()

}

3.3

vor_pip = pip_func(dams_sf, vor_tess, "id")
tri_pip = pip_func(dams_sf, tri_tess, "id")
gridded_pip = pip_func(dams_sf, gridded_counties, "id")
hex_pip = pip_func(dams_sf, hex_counties, "id")
counties_simp_pip = pip_func_county_simp(dams_sf, counties_simp, "geoid")

3.4

plot_pip = function(sf_obj, title){
  ggplot()+
    geom_sf(data = sf_obj, aes(fill = log(n)))+
    scale_fill_viridis_c()+
    theme_void() +
    labs(title = title, caption = paste("This tessellation has:", sum(sf_obj$n), "dams." ))
}

3.5

plot_pip(vor_pip, "Voronoi")

plot_pip(tri_pip, "triangulation")

plot_pip(gridded_pip, "gridded")

plot_pip(hex_pip, "hexagonal")

plot_pip(counties_simp_pip, "original")

3.6

The more polygons each tessellation had, the higher the amount of dams there were in each section. Moving forward, we will be using the hexagonal coverage tessellation.

4.1

dam_freq = strsplit(dams_sf$PURPOSES, split = "") %>%
     unlist() %>%
     table() %>%
     as.data.frame() %>%
     setNames(c("abbr", "count"))



r_grep = grepl("R", dams_sf$PURPOSES[1:20])
s_grep = grepl("S", dams_sf$PURPOSES[1:20])
h_grep = grepl("H", dams_sf$PURPOSES[1:20])
d_grep = grepl("D", dams_sf$PURPOSES[1:20])

4.2

plot_pip = function(sf_obj, title){
  ggplot()+
    geom_sf(data = sf_obj, aes(fill = log(n)))+
    scale_fill_viridis_c()+
    theme_void() +
    labs(title = title, caption = paste("This tessellation has:", sum(sf_obj$n), "dams." ))+
    gghighlight::gghighlight(n>=(mean(n)+sd(n)))
}


rec = dams_sf %>% 
  filter(grepl("R", PURPOSES)) %>% 
  pip_func(vor_tess, "id") %>% 
  plot_pip(vor_tess)+
  gghighlight::gghighlight(n >= (mean(n)+sd(n)))


s = dams_sf %>% 
  filter(grepl("S", PURPOSES)) %>% 
  pip_func(vor_tess, "id") %>% 
  plot_pip(vor_tess)+
  gghighlight::gghighlight(n >= (mean(n)+sd(n)))


h = dams_sf %>% 
  filter(grepl("H", PURPOSES)) %>% 
  pip_func(vor_tess, "id") %>% 
  plot_pip(vor_tess)+
  gghighlight::gghighlight(n >= (mean(n)+sd(n)))


d = dams_sf %>% 
  filter(grepl("D", PURPOSES)) %>% 
  pip_func(vor_tess, "id") %>% 
  plot_pip(vor_tess)+
  gghighlight::gghighlight(n >= (mean(n)+sd(n)))

rec

s

h

d