knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
library(tidyverse)
library(wesanderson)
library(ggforce)
library(rcartocolor)

hate_crime <- read.csv("hate_crime.csv")
#library(colorspace)
#library(devtools)
#library(dplyr)
#library(forcats)
#library(ggplot2)
#library(ggridges)
#library(ggthemes)
#library(knitr)
#library(leaflet)
#library(leaflet.providers)
#library(maptools)
#library(rcmdcheck)
#library(rmarkdown)
#library(rnaturalearth)
#library(rnaturalearthdata)
#library(rnaturalearthhires)
#library(sf)
#library(skimr)
#library(spatial, lib.loc = "C:/Program Files/R/R-4.2.1/library")
#library(viridis)
#library(tigris)
#library(polyclip)
#library(jsonlite)
#library(labeling)

Prepping and Cleaning the Data

I limited the states to the states with the highest percent of rural populations. There was not enough available data for Mississippi, thus I removed it and replaced it with South Dakota.

I chose these variables to create representations using the most common names from each column. For example, the OFFENSE_NAME variables I chose were the 5 most common types of offenses committed.

state_offense <- hate_crime %>% 
  select(BIAS_DESC, VICTIM_COUNT, OFFENSE_NAME, STATE_NAME, OFFENDER_RACE, DATA_YEAR, 
         TOTAL_OFFENDER_COUNT, LOCATION_NAME, AGENCY_TYPE_NAME) %>% 
  filter(STATE_NAME == "Maine" | STATE_NAME == "Vermont" | STATE_NAME == "Arkansas" | 
           STATE_NAME == "West Virginia" | STATE_NAME == "South Dakota" | STATE_NAME == "Montana") %>%
  filter(OFFENSE_NAME == "Aggravated Assault" | OFFENSE_NAME == "Intimidation" |
           OFFENSE_NAME == "Murder and Nonnegligent Manslaughter" | OFFENSE_NAME == "Robbery" |
           OFFENSE_NAME == "Destruction/Damage/Vandalism of Property") %>% 
  filter(LOCATION_NAME == "Residence/Home" | LOCATION_NAME == "School/College" 
         | LOCATION_NAME == "Church/Synagogue/Temple/Mosque" | LOCATION_NAME == "Parking/Drop Lot/Garage" 
         | LOCATION_NAME == "Highway/Road/Alley/Street/Sidewalk") %>%
  filter(BIAS_DESC == "Anti-Black or African American" | BIAS_DESC == "Anti-White" |
           BIAS_DESC == "Anti-Jewish" | BIAS_DESC == "Anti-Hispanic or Latino" | BIAS_DESC == "Anti-Gay (Male)") %>% 
  filter(DATA_YEAR >= "2008" & DATA_YEAR <= "2020")

I utilized this command to identify the 7 most common columns in the variables.

tail(names(sort(table(hate_crime$LOCATION_NAME))), 7)
## [1] "Commercial/Office Building"         "Church/Synagogue/Temple/Mosque"    
## [3] "Parking/Drop Lot/Garage"            "School/College"                    
## [5] "Other/Unknown"                      "Highway/Road/Alley/Street/Sidewalk"
## [7] "Residence/Home"

Figure 1

A Bar graph that details the Offense name based on state with the a majority of the population living in rural areas. Also Including California and Virginia for context.

In the first two graphs, I wanted to see the relation of what were the more common offenses that took place each year, divided by the state. I also created two time periods to compare the difference. Each period lasted about 6 years, starting with 2008.

state_offense %>% 
  filter(DATA_YEAR >= "2014" & DATA_YEAR <= "2020") %>% 
  ggplot(aes(x = DATA_YEAR, fill = OFFENSE_NAME))+
  geom_bar(position = "dodge") +
  labs(x = "2014 - 2020", y = "Offense Type", title = "Annual Offense Type Reported by State")+
  labs(fill = "Type of Offense") +
  facet_wrap(~ STATE_NAME)

state_offense %>% 
  filter(DATA_YEAR >= "2008" & DATA_YEAR <= "2013") %>% 
  ggplot(aes(x = DATA_YEAR, fill = OFFENSE_NAME))+
  geom_bar(position = "dodge") +
  labs(x = "2008 - 2020", y = "Offense Type", title = "Annual Offense Type Reported by State") +
  labs(fill = "Type of Offense") +
  facet_wrap(~ STATE_NAME)

Figure 2

The second graph portrays when each negative bias against a specific race or identified group experienced the most active number of hate crime events. For example, the graph identifies that in 2012, anti-Black rhetoric was more common while in 2011, Hispanic/Latino individuals experienced more hate crimes on average.

state_offense %>% 
  ggplot(aes(x = BIAS_DESC, y = DATA_YEAR, color = BIAS_DESC))+
  geom_point(size = 2, position = "jitter", alpha = 1/2)+
  scale_color_manual(values = wes_palette("FantasticFox1")) +
  stat_summary(fun = "median", geom = "point", color = "red", size = 5) +
  labs(title = "Frequency of Biases based by Year", x = "Year", 
       y = "Type of Anti-Bias", col = "Type of Anti-Bias")

Figure 3

The third graph identifies out of the top 5 locations, where were hate crimes most commonly executed and during what year.

state_offense %>% 
    ggplot(aes(x = LOCATION_NAME, y = DATA_YEAR, color = LOCATION_NAME))+
    geom_sina(size = 2)+
    scale_color_carto_d(palette = "Vivid")+
  labs(y = "Year", x = "Location of Offense", title = "Common Offense Location by Year")+
  theme_minimal() +
  labs(col = "Location of Offense Legend")