Beginning in approximately 1970, the National UFO Reporting Center (NUFORC) began accepting alleged sightings of unidentified flying objects from around the world. This brief analysis will offer some basic statistics of the past decade’s sightings.
:
ufo_data <- readRDS('ufo_data.RDS')
# scrape ufo sighting data
library(tidyverse)
#install.packages("rvest")
library(rvest)
library(extrafont)
library(remotes)
#remotes::install_version("Rttf2pt1", version = "1.3.8")
#extrafont::font_import()
loadfonts()
library(ggthemes)
library(viridis)
library(tigris)
library(sf)
library(tidycensus)
library(wordcloud)
library(tm)
#install.packages("wordcloud2")
library(wordcloud2)
#install.packages("leaflet")
library(leaflet)
library(lubridate)
It would appear our otherworldly friends have an aversion to all things un-American. Go USA, I suppose.
#making initial plots
#but first a custom palette
Aliens <- c("#496800", "#527500", "#5b8200", "#679300", "#72a300", "#81b800", "#8fcc00", "#9bdd00", "#a7ee00", "#b3ff00")
a <- ufo_data %>%
filter(CountryName != "",
CountryName != "USA") %>%
group_by(CountryName) %>%
summarise(TotalSightings = n()) %>%
filter(TotalSightings > 10) %>% arrange(desc(TotalSightings))
#plot for nonUS sightings
a %>%
slice_max(TotalSightings, n = 10) %>%
ggplot(aes(x = reorder(CountryName, -TotalSightings), y = TotalSightings)) +
geom_col(fill = Aliens, color = "white", alpha = 0.75) +
geom_text(aes(label = TotalSightings, vjust = -0.75))+
labs(title = "Top 10 non-US Countries for UFO Sightings",
subtitle = "2012-2022",
x = "",
y = "Total Sightings",
caption = "Chris Barber; 9/18/2022; Source: NUFORC")+
theme_fivethirtyeight()+
theme(text = element_text(family = "Courier New"),
axis.text = element_text(size = 10))
Understandably, aliens are infatuated with Hollywood and ‘Florida Man’, but is this just a population chart? We’ll return to this later.
#plot for US states
state_sightings <- ufo_data %>%
filter(CountryName == "USA",
!is.na(CountryName),
!is.na(State)) %>%
group_by(State) %>%
summarise(TotalSightings = n()) %>% arrange(desc(TotalSightings))
state_sightings %>% slice_max(TotalSightings, n = 10) %>%
ggplot(aes(x = reorder(State, -TotalSightings), y = TotalSightings)) +
geom_col(fill = Aliens, color = "white", alpha = 0.75) +
geom_text(aes(label = TotalSightings, vjust = -0.75))+
theme_fivethirtyeight()+
theme(text = element_text(family = "Courier New"),
axis.text = element_text(size = 10))+
labs(title = "Top 10 US States for UFO Sightings",
subtitle = "2012-2022",
x = "",
y = "Total Sightings",
caption = "Chris Barber; 9/18/2022; Source: NUFORC") #just a population measure?!
## When have UFO sightings been most common in the past decade?
What was happening in 2014 that so many UFOs manifested before us? That was the year Malaysia Airlines Flight 370 disappeared. Perhaps there is a connection…
sightings_year <- ufo_data %>%
filter(CountryName == "USA",
!is.na(Year)) %>%
group_by(Year) %>%
summarise(YearlySightings = n())
sightings_year %>%
ggplot(aes(Year, YearlySightings)) +
geom_segment(aes(x = Year, xend = Year, y = 0, yend = YearlySightings)) +
geom_point(color = "#81b800", size = 5.5)+
theme_fivethirtyeight() +
labs(title = "Global UFO Sightings by Year",
subtitle = "2012 - 2022",
x = "",
y = "Total Yearly Sightings",
caption = "Chris Barber 9/18/2022; Source: NUFORC")+
theme(text = element_text(family = "Courier New"))
Further reinforcing the patriotic mentality of aliens, July is the most popular month for sightings. Although, one wonders if this is simply a visual trick resulting from too much cheap beer and fireworks.
#montlysightings
monthly_sightings <- ufo_data %>% group_by(Month) %>%
summarise(MonthlySightings = n()) %>%
mutate(Month = as.numeric(Month)) %>% arrange(Month)
monthly_sightings <- monthly_sightings %>% mutate(Month = factor(Month))
monthly_sightings <- monthly_sightings %>% mutate(Month = factor(Month,
levels = c(1,2,3,4,5,6,7,8,9,10,11,12),
labels = c("January","February","March","April","May","June",
"July","August","September","October","November","December")))
monthly_sightings %>%
ggplot(aes(x = fct_rev(Month), y = MonthlySightings))+ #want to reverse the month order
geom_segment(aes(x = Month, xend = Month, y = 0, yend = MonthlySightings))+
geom_point(color = "#81b800", size = 5.5)+
theme_fivethirtyeight()+
labs(title = "Global UFO Sightings by Month",
subtitle = "2012 - 2022",
x = "",
y = "Total Monthly Sightings",
caption = "Chris Barber; 9/21/2022; Source: NUFORC")+
theme(text = element_text(family = "Courier New"))+
coord_flip()
9:00pm is the most popular time of day for UFOs to appear. They couldn’t make it too obvious by appearing during the day, naturally.
#across a day sightings plot
ufo_data %>%
mutate(RealTime = parse_time(RealTime, format = '%H:%M:%S')) %>%
ggplot(aes(x = RealTime)) +
geom_density(color = '#496800', fill = '#527500', alpha = 0.5) +
theme_fivethirtyeight()+
scale_x_time(breaks = hms(c('00:00:00','06:00:00','12:00:00','18:00:00','23:59:59')),
labels = c('Midnight','6:00AM','Noon','6:00PM','11:59PM'))+
theme(axis.text.y = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
text = element_text(family = 'Courier New'))+
labs(title = 'Distribution of UFO Sightings across 24 Hours',
subtitle = '2012-2022',
caption = 'Chris Barber; 9/30/2022; Source: NUFORC')
According to the 24 categories NUFORC offers eye-witnesses, ‘light’ overwhelmingly wins.
#plots for shape
UFO_shape <- ufo_data %>%
group_by(Shape) %>%
summarise(Occurences = n())
ufo_data %>%
filter(Shape != "") %>%
mutate(Shape = factor(Shape)) %>% #I want to reverse the shape order
ggplot(aes(fct_infreq(Shape)))+
geom_histogram(stat = "count", fill = viridis(n = 23,option = "plasma"))+
theme_fivethirtyeight()+
labs(title = "Global Occurences of UFO Shapes",
subtitle = "2012 - 2022",
caption = "Chris Barber; 9/22/2022; Source: NUFORC")+
theme(text = element_text(family = "Courier New"))+
coord_flip()
NUFORC gives users the ability to give an open-ended description of their experience.
LatterHalf <- ufo_data %>% filter(Year == "2018"|Year == "2019"|Year == "2020"|Year == "2021"|Year == "2022")
UFO_experience <- Corpus(VectorSource(LatterHalf$Summary))
UFO_experience <- UFO_experience %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation)
UFO_experience <- tm_map(UFO_experience, content_transformer(tolower))
UFO_experience <- tm_map(UFO_experience, removeWords, stopwords("english"))
dtm <- TermDocumentMatrix(UFO_experience)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df <- data.frame(word = names(words),freq=words)
wordcloud(words = df$word, freq = df$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,
colors= viridis(8))
NUFORC is very thorough in its examination of UFO sightings. To discern the lies from the truth, the experts at NUFORC will occasionally mark certain reports as a ‘hoax’. There is no explanation of their methodology. Although from the chart below, we can see which states are the biggest culprits and in which season they like to fib.
tmp <- ufo_data %>%
mutate(hoax = ifelse(str_detect(Summary, "^\\(\\(HOAX"),
"hoax", "nonhoax"))
hoaxes <- tmp %>% filter(hoax == "hoax")
hoaxes <- hoaxes %>%
filter(Country == 'USA',
!is.na(StateName))
sightings_byseason <- hoaxes %>%
group_by(StateName, Season) %>%
summarise(TotalHoaxes = n())
sightings_byseason %>%
ggplot(aes(x = StateName, y = Season, size = TotalHoaxes, color = Season)) +
scale_color_manual(values = c('goldenrod3','chartreuse','firebrick3','darkblue'))+
geom_point(alpha = 0.5) +
facet_wrap(~StateName)+
theme(axis.title.y = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
text = element_text(family = 'Courier New'))+
labs(title = "'Hoaxes' across states & seasons",
subtitle = '2012 - 2022',
color = "",
caption = 'Chris Barber; 9/30/2022; Source: NUFORC')
Using state population data from US Census Bureau’s American Community Survey, we can generate a more accurate assessments of where UFOs most commonly appear in the US. From these two maps, it is clear they prefer the Pacific Northwest and New England. Alas, where the least light pollution.
#creating choropleths after adjusting for population
state_pop <- get_acs(geography = "state",
variables = "B01001_001",
year = 2015)
load_variables(year = 2015, dataset = "acs5") %>% view()
states <- states(cb = TRUE)
st_crs(states) # NAD83
not48 <- c("02", "15", "60", "66", "69", "72", "78")
states48 <- states %>%
filter(!(GEOID %in% not48))
state_sightings <- state_sightings %>%
mutate(STUSPS = State) %>% select(STUSPS, TotalSightings)
UFO_pop <- inner_join(states48, state_pop, by = "GEOID")
UFO_pop <- UFO_pop %>% inner_join(state_sightings, by = "STUSPS")
UFO_pop <- UFO_pop %>% mutate(UFO_pct = TotalSightings/estimate) %>%
mutate(UFO_pct = UFO_pct*1000)
UFO_pop <- UFO_pop %>% mutate(UFO_pct = round(UFO_pct, 4))
#basic choropleth
UFO_pop %>%
ggplot(aes(fill = UFO_pct)) +
geom_sf(color = "white") +
scale_fill_viridis(option = "plasma") + #i want to use the Aliens palette but don't know how to specify the number of colors
theme_void()+
labs(title = "US UFO Sightings Per 1k People",
subtitle = "2012 - 2022",
caption = "Chris Barber; 9/19/2022; Source: NUFORC, US Census Bureau",
fill = "UFO Sightings")+
theme(text = element_text(family = "Courier New"))
#leaflet
UFO_pop <- st_transform(UFO_pop, 4326)
pal <- colorNumeric("inferno", domain = UFO_pop$UFO_pct)
leaflet() %>%
addProviderTiles("CartoDB.DarkMatterNoLabels") %>%
addPolygons(data = UFO_pop,
fillColor = ~pal(UFO_pct),
fillOpacity = 0.75,
weight = 1,
opacity = 1,
color = "gray70",
highlight = highlightOptions(
weight = 2,
fillOpacity = 0.75,
bringToFront = T),
popup = paste0("State: ", UFO_pop$NAME.x, "<br>",
"Sightings Per 1k People: ", UFO_pop$UFO_pct, 2)) %>%
addLegend("bottomright", pal = pal, values = UFO_pop$UFO_pct,
title = "Sightings per 1k People", opacity = 0.75)