knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
library(tidyverse)
library(wesanderson)
library(ggforce)
library(rcartocolor)
hate_crime <- read.csv("hate_crime.csv")
#library(colorspace)
#library(devtools)
#library(dplyr)
#library(forcats)
#library(ggplot2)
#library(ggridges)
#library(ggthemes)
#library(knitr)
#library(leaflet)
#library(leaflet.providers)
#library(maptools)
#library(rcmdcheck)
#library(rmarkdown)
#library(rnaturalearth)
#library(rnaturalearthdata)
#library(rnaturalearthhires)
#library(sf)
#library(skimr)
#library(spatial, lib.loc = "C:/Program Files/R/R-4.2.1/library")
#library(viridis)
#library(tigris)
#library(polyclip)
#library(jsonlite)
#library(labeling)
I limited the states to the states with the highest percent of rural populations. There was not enough available data for Mississippi, thus I removed it and replaced it with South Dakota.
I chose these variables to create representations using the most common names from each column. For example, the OFFENSE_NAME variables I chose were the 5 most common types of offenses committed.
state_offense <- hate_crime %>%
select(BIAS_DESC, VICTIM_COUNT, OFFENSE_NAME, STATE_NAME, OFFENDER_RACE, DATA_YEAR,
TOTAL_OFFENDER_COUNT, LOCATION_NAME, AGENCY_TYPE_NAME) %>%
filter(STATE_NAME == "Maine" | STATE_NAME == "Vermont" | STATE_NAME == "Arkansas" |
STATE_NAME == "West Virginia" | STATE_NAME == "South Dakota" | STATE_NAME == "Montana") %>%
filter(OFFENSE_NAME == "Aggravated Assault" | OFFENSE_NAME == "Intimidation" |
OFFENSE_NAME == "Murder and Nonnegligent Manslaughter" | OFFENSE_NAME == "Robbery" |
OFFENSE_NAME == "Destruction/Damage/Vandalism of Property") %>%
filter(LOCATION_NAME == "Residence/Home" | LOCATION_NAME == "School/College"
| LOCATION_NAME == "Church/Synagogue/Temple/Mosque" | LOCATION_NAME == "Parking/Drop Lot/Garage"
| LOCATION_NAME == "Highway/Road/Alley/Street/Sidewalk") %>%
filter(BIAS_DESC == "Anti-Black or African American" | BIAS_DESC == "Anti-White" |
BIAS_DESC == "Anti-Jewish" | BIAS_DESC == "Anti-Hispanic or Latino" | BIAS_DESC == "Anti-Gay (Male)") %>%
filter(DATA_YEAR >= "2008" & DATA_YEAR <= "2020")
I utilized this command to identify the 7 most common columns in the variables.
tail(names(sort(table(hate_crime$LOCATION_NAME))), 7)
## [1] "Commercial/Office Building" "Church/Synagogue/Temple/Mosque"
## [3] "Parking/Drop Lot/Garage" "School/College"
## [5] "Other/Unknown" "Highway/Road/Alley/Street/Sidewalk"
## [7] "Residence/Home"
A Bar graph that details the Offense name based on state with the a majority of the population living in rural areas. Also Including California and Virginia for context.
In the first two graphs, I wanted to see the relation of what were the more common offenses that took place each year, divided by the state. I also created two time periods to compare the difference. Each period lasted about 6 years, starting with 2008.
state_offense %>%
filter(DATA_YEAR >= "2014" & DATA_YEAR <= "2020") %>%
ggplot(aes(x = DATA_YEAR, fill = OFFENSE_NAME))+
geom_bar(position = "dodge") +
labs(x = "2014 - 2020", y = "Offense Type", title = "Annual Offense Type Reported by State")+
labs(fill = "Type of Offense") +
facet_wrap(~ STATE_NAME)
state_offense %>%
filter(DATA_YEAR >= "2008" & DATA_YEAR <= "2013") %>%
ggplot(aes(x = DATA_YEAR, fill = OFFENSE_NAME))+
geom_bar(position = "dodge") +
labs(x = "2008 - 2020", y = "Offense Type", title = "Annual Offense Type Reported by State") +
labs(fill = "Type of Offense") +
facet_wrap(~ STATE_NAME)
The second graph portrays when each negative bias against a specific race or identified group experienced the most active number of hate crime events. For example, the graph identifies that in 2012, anti-Black rhetoric was more common while in 2011, Hispanic/Latino individuals experienced more hate crimes on average.
state_offense %>%
ggplot(aes(x = BIAS_DESC, y = DATA_YEAR, color = BIAS_DESC))+
geom_point(size = 2, position = "jitter", alpha = 1/2)+
scale_color_manual(values = wes_palette("FantasticFox1")) +
stat_summary(fun = "median", geom = "point", color = "red", size = 5) +
labs(title = "Frequency of Biases based by Year", x = "Year",
y = "Type of Anti-Bias", col = "Type of Anti-Bias")
The third graph identifies out of the top 5 locations, where were hate crimes most commonly executed and during what year.
state_offense %>%
ggplot(aes(x = LOCATION_NAME, y = DATA_YEAR, color = LOCATION_NAME))+
geom_sina(size = 2)+
scale_color_carto_d(palette = "Vivid")+
labs(y = "Year", x = "Location of Offense", title = "Common Offense Location by Year")+
theme_minimal() +
labs(col = "Location of Offense Legend")