library(tidyverse)
library(sf)
library(units)
library(leaflet)
library(readxl)
library(ggplot2)
USAboundaries::us_states
## function (map_date = NULL, resolution = c("low", "high"), states = NULL)
## {
## resolution <- match.arg(resolution)
## if (is.null(map_date)) {
## if (resolution == "low") {
## shp <- USAboundaries::states_contemporary_lores
## }
## else if (resolution == "high") {
## check_data_package()
## shp <- USAboundariesData::states_contemporary_hires
## }
## shp <- filter_by_states(shp, states)
## }
## else {
## map_date <- as.Date(map_date)
## stopifnot(as.Date("1783-09-03") <= map_date, map_date <=
## as.Date("2000-12-31"))
## check_data_package()
## if (resolution == "low") {
## shp <- USAboundariesData::states_historical_lores
## }
## else if (resolution == "high") {
## shp <- USAboundariesData::states_historical_hires
## }
## shp <- filter_by_date(shp, map_date)
## shp <- filter_by_states(shp, states)
## }
## shp
## }
## <bytecode: 0x0000000019279990>
## <environment: namespace:USAboundaries>
conus = USAboundaries::us_states() %>%
filter(!state_name %in% c("Puerto Rico",
"Alaska",
"Hawaii"))
counties = USAboundaries::us_counties() %>%
st_as_sf(counties) %>%
filter(!state_name %in% c("Puerto Rico",
"Alaska",
"Hawaii")) %>%
st_transform(counties, crs = 5070)
county_centroid = st_centroid(counties) %>%
st_union()
# voronoi tessellation
vor_tess = st_voronoi(county_centroid) %>%
st_cast() %>%
st_as_sf() %>%
mutate(id=1:n())
# triangulated tessellation
tri_tess = st_triangulate(county_centroid) %>%
st_cast() %>%
st_as_sf() %>%
mutate(id=1:n())
# gridded coverage
gridded_counties = st_make_grid(county_centroid, n = 70, square = T) %>%
st_cast() %>%
st_as_sf() %>%
mutate(id=1:n())
# hexagonal coverage
hex_counties = st_make_grid(county_centroid, n=70, square = F) %>%
st_cast() %>%
st_as_sf() %>%
mutate(id=1:n())
vor_tess = st_intersection(vor_tess, st_union(counties))
tri_tess = st_intersection(tri_tess, st_union(counties))
gridded_counties = st_intersection(gridded_counties, st_union(counties))
hex_counties = st_intersection(hex_counties, st_union(counties))
counties_simp = rmapshaper::ms_simplify(counties, keep = .005)
## Registered S3 method overwritten by 'geojsonlint':
## method from
## print.location dplyr
mapview::npts(counties)
## [1] 51976
mapview::npts(counties_simp)
## [1] 21023
Just over 50% of the points were removed. This makes the edge of the border less accurate, but makes the computations faster.
plot_tess = function(sf_obj, title){
ggplot()+
geom_sf(data = sf_obj, fill = "white", col = "navy", size = .2)+
theme_void() +
labs(title = title, caption = paste("This tessellation has:", nrow(sf_obj), "features." ))
}
plot_tess(vor_tess, "Voronoi Tessellation")
plot_tess(tri_tess, "Triangulated Tessellation")
plot_tess(gridded_counties, "Gridded Coverage")
plot_tess(hex_counties, "Hexagonal Coverage")
plot_tess(counties_simp, "Original")
tess_sum = function(sf_obj, text){
area = st_area(sf_obj) %>%
units::set_units("km^2") %>%
units::drop_units()
data.frame(num_features = nrow(sf_obj),
mean_area = mean(area),
sd_area = sd(area),
total_area = sum(area),
text = text
)
}
vor_sum_tess = tess_sum(vor_tess, "Voronoi")
tri_sum_tess = tess_sum(tri_tess, "Triangulation")
hex_sum = tess_sum(hex_counties, "Hexagon")
grid_sum = tess_sum(gridded_counties, "Grid")
summary_tess = bind_rows(
tess_sum(counties_simp, "Original Counties"),
tess_sum(tri_tess, "Triangulation"),
tess_sum(vor_tess, "Voronoi"),
tess_sum(hex_counties, "Hexagon"),
tess_sum(gridded_counties, "Grid")
)
knitr::kable(summary_tess,
caption = "Five Tessellation summaries",
col.names = c("Number of Features", "Mean Area", "Standard Deviation of Features", "Total Area", "Text"),
format.args = list(big.marks = ","))
Number of Features | Mean Area | Standard Deviation of Features | Total Area | Text |
---|---|---|---|---|
3069 | 2539.396 | 3455.0175 | 7793407 | Original Counties |
6196 | 1251.808 | 1575.7996 | 7756200 | Triangulation |
3108 | 2521.745 | 2887.1397 | 7837583 | Voronoi |
1639 | 3494.238 | 382.4362 | 5727056 | Hexagon |
2013 | 2497.477 | 275.0625 | 5027422 | Grid |
For each of the tessellations, the area and number of polygons changed. This will cause the MAUP to change with the tessellations. The triangulation tessellation had the most number of polygons, and the hexagonal coverage had the about half the number of polygons. The total area was largest with the gridded coverage tessellation.
dams <- read_excel("NID2019_U.xlsx")
dams_sf = dams %>%
filter(!is.na(LONGITUDE), !is.na(LATITUDE)) %>%
st_as_sf(coords = c("LONGITUDE","LATITUDE"), crs = 4326) %>%
st_transform(5070)
pip_func = function(points, polygon, id){
st_join(polygon, points) %>%
st_drop_geometry() %>%
count(get('id')) %>%
setNames(c(id, "n")) %>%
left_join(polygon, by = id) %>%
st_as_sf()
}
#### second function for the original data because the id column is "geoid" and the function was only taking count(get("id)) with id in quotes ####
pip_func_county_simp = function(points, polygon, id){
st_join(polygon, points) %>%
st_drop_geometry() %>%
count(get('geoid')) %>%
setNames(c(id, "n")) %>%
left_join(polygon, by = id) %>%
st_as_sf()
}
vor_pip = pip_func(dams_sf, vor_tess, "id")
tri_pip = pip_func(dams_sf, tri_tess, "id")
gridded_pip = pip_func(dams_sf, gridded_counties, "id")
hex_pip = pip_func(dams_sf, hex_counties, "id")
counties_simp_pip = pip_func_county_simp(dams_sf, counties_simp, "geoid")
plot_pip = function(sf_obj, title){
ggplot()+
geom_sf(data = sf_obj, aes(fill = log(n)))+
scale_fill_viridis_c()+
theme_void() +
labs(title = title, caption = paste("This tessellation has:", sum(sf_obj$n), "dams." ))
}
plot_pip(vor_pip, "Voronoi")
plot_pip(tri_pip, "triangulation")
plot_pip(gridded_pip, "gridded")
plot_pip(hex_pip, "hexagonal")
plot_pip(counties_simp_pip, "original")
The more polygons each tessellation had, the higher the amount of dams there were in each section. Moving forward, we will be using the hexagonal coverage tessellation.
dam_freq = strsplit(dams_sf$PURPOSES, split = "") %>%
unlist() %>%
table() %>%
as.data.frame() %>%
setNames(c("abbr", "count"))
r_grep = grepl("R", dams_sf$PURPOSES[1:20])
s_grep = grepl("S", dams_sf$PURPOSES[1:20])
h_grep = grepl("H", dams_sf$PURPOSES[1:20])
d_grep = grepl("D", dams_sf$PURPOSES[1:20])
plot_pip = function(sf_obj, title){
ggplot()+
geom_sf(data = sf_obj, aes(fill = log(n)))+
scale_fill_viridis_c()+
theme_void() +
labs(title = title, caption = paste("This tessellation has:", sum(sf_obj$n), "dams." ))+
gghighlight::gghighlight(n>=(mean(n)+sd(n)))
}
rec = dams_sf %>%
filter(grepl("R", PURPOSES)) %>%
pip_func(vor_tess, "id") %>%
plot_pip(vor_tess)+
gghighlight::gghighlight(n >= (mean(n)+sd(n)))
s = dams_sf %>%
filter(grepl("S", PURPOSES)) %>%
pip_func(vor_tess, "id") %>%
plot_pip(vor_tess)+
gghighlight::gghighlight(n >= (mean(n)+sd(n)))
h = dams_sf %>%
filter(grepl("H", PURPOSES)) %>%
pip_func(vor_tess, "id") %>%
plot_pip(vor_tess)+
gghighlight::gghighlight(n >= (mean(n)+sd(n)))
d = dams_sf %>%
filter(grepl("D", PURPOSES)) %>%
pip_func(vor_tess, "id") %>%
plot_pip(vor_tess)+
gghighlight::gghighlight(n >= (mean(n)+sd(n)))
rec
s
h
d