").append(m.parseHTML(a)).find(d):a)}).complete(c&&function(a,b){g.each(c,e||[a.responseText,b,a])}),this},m.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(a,b){m.fn[b]=function(a){return this.on(b,a)}}),m.expr.filters.animated=function(a){return m.grep(m.timers,function(b){return a===b.elem}).length};var cc=a.document.documentElement;function dc(a){return m.isWindow(a)?a:9===a.nodeType?a.defaultView||a.parentWindow:!1}m.offset={setOffset:function(a,b,c){var d,e,f,g,h,i,j,k=m.css(a,"position"),l=m(a),n={};"static"===k&&(a.style.position="relative"),h=l.offset(),f=m.css(a,"top"),i=m.css(a,"left"),j=("absolute"===k||"fixed"===k)&&m.inArray("auto",[f,i])>-1,j?(d=l.position(),g=d.top,e=d.left):(g=parseFloat(f)||0,e=parseFloat(i)||0),m.isFunction(b)&&(b=b.call(a,c,h)),null!=b.top&&(n.top=b.top-h.top+g),null!=b.left&&(n.left=b.left-h.left+e),"using"in b?b.using.call(a,n):l.css(n)}},m.fn.extend({offset:function(a){if(arguments.length)return void 0===a?this:this.each(function(b){m.offset.setOffset(this,a,b)});var b,c,d={top:0,left:0},e=this[0],f=e&&e.ownerDocument;if(f)return b=f.documentElement,m.contains(b,e)?(typeof e.getBoundingClientRect!==K&&(d=e.getBoundingClientRect()),c=dc(f),{top:d.top+(c.pageYOffset||b.scrollTop)-(b.clientTop||0),left:d.left+(c.pageXOffset||b.scrollLeft)-(b.clientLeft||0)}):d},position:function(){if(this[0]){var a,b,c={top:0,left:0},d=this[0];return"fixed"===m.css(d,"position")?b=d.getBoundingClientRect():(a=this.offsetParent(),b=this.offset(),m.nodeName(a[0],"html")||(c=a.offset()),c.top+=m.css(a[0],"borderTopWidth",!0),c.left+=m.css(a[0],"borderLeftWidth",!0)),{top:b.top-c.top-m.css(d,"marginTop",!0),left:b.left-c.left-m.css(d,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var a=this.offsetParent||cc;while(a&&!m.nodeName(a,"html")&&"static"===m.css(a,"position"))a=a.offsetParent;return a||cc})}}),m.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(a,b){var c=/Y/.test(b);m.fn[a]=function(d){return V(this,function(a,d,e){var f=dc(a);return void 0===e?f?b in f?f[b]:f.document.documentElement[d]:a[d]:void(f?f.scrollTo(c?m(f).scrollLeft():e,c?e:m(f).scrollTop()):a[d]=e)},a,d,arguments.length,null)}}),m.each(["top","left"],function(a,b){m.cssHooks[b]=La(k.pixelPosition,function(a,c){return c?(c=Ja(a,b),Ha.test(c)?m(a).position()[b]+"px":c):void 0})}),m.each({Height:"height",Width:"width"},function(a,b){m.each({padding:"inner"+a,content:b,"":"outer"+a},function(c,d){m.fn[d]=function(d,e){var f=arguments.length&&(c||"boolean"!=typeof d),g=c||(d===!0||e===!0?"margin":"border");return V(this,function(b,c,d){var e;return m.isWindow(b)?b.document.documentElement["client"+a]:9===b.nodeType?(e=b.documentElement,Math.max(b.body["scroll"+a],e["scroll"+a],b.body["offset"+a],e["offset"+a],e["client"+a])):void 0===d?m.css(b,c,g):m.style(b,c,d,g)},b,f?d:void 0,f,null)}})}),m.fn.size=function(){return this.length},m.fn.andSelf=m.fn.addBack,"function"==typeof define&&define.amd&&define("jquery",[],function(){return m});var ec=a.jQuery,fc=a.$;return m.noConflict=function(b){return a.$===m&&(a.$=fc),b&&a.jQuery===m&&(a.jQuery=ec),m},typeof b===K&&(a.jQuery=a.$=m),m});
For #tidytuesday we’re looking at Amusement Park injuries. I plan on making a simple visual of the number of injuries by month.
Libraries
if (!require(pacman)) {install.packages('pacman')}
p_load(janitor, skimr, stringr, tidyverse, lubridate)
Import Data
Previous inspection of the raw data shows that some NA values are denoted other strings such as “n/a” or “#########”. This does not get picked up as NA in the default condition so must me manually listed.
#Split over multiple lines for legibility
data_url <- paste0("https://raw.githubusercontent.com/rfordatascience/",
"tidytuesday/master/data/2019/2019-09-10/",
"tx_injuries.csv")
#Define observed N/A types
na_list <- c("NA", "n/a", "#########", "N/A", "na")
#Import Data
tx_injuries <- readr::read_csv(file = data_url, na = na_list)
Data Cleaning / Preparation
Date Correction
There are two date formats used in the data set. One date has a “M/D/Y” format. The other date is represented as a serial number. Both are character strings. To covert the dates to a consistent format and a date object the following steps were taken.
- Drop all missing dates.
- Use an if/else statement to determine which date format is being processed.
- For the “M/D/Y” dates use the mdy() function from lubridate to convert to a date object. Save the date object in a new column using mutate.
- Convert the serial date values into a character string date with a “M-D-Y” format. Then use the excel_numeric_to_date from the janitor package to convert to a date object. Save the date object in a new column using mutate, the same column as the other date format from Step 3.
# Consolidate Date Types / Drop Missing Dates
tx_injuries <- tx_injuries %>%
# Drop N/A Injury dates
drop_na(injury_date) %>%
# Unify date type
mutate(injury_date_conv = if_else(
# Check if date uses "/"
grepl(pattern = "/",x = injury_date),
# Converts M-D-Y dates
mdy(injury_date),
# Converts Serial dates
excel_numeric_to_date(as.numeric(injury_date)
, date_system = "modern")
)
)
Injuries By Month
With a new column with each injury date as a date object, we then sum the number of injuries each month, using group_by with both year and month. For the final visual a dummy day column is added, with date of 1. This day column will be used to create another date object. To create the date object a string is generated by concatenating the year, month, and day columns into a new single column, and then converting this full date string into a date object again using the mdy() function from lubridate.
# Data Frame Development
tx_injuries <- tx_injuries %>%
mutate(month = month(injury_date_conv),
year = year(injury_date_conv)) %>%
group_by(year, month) %>%
summarise(injuries = n()) %>%
mutate(day = 1,
eff_date_char = paste(year,month,day, sep = "-"),
eff_date = ymd(eff_date_char)) %>%
select(-eff_date_char)
Visual
Now the injuries recorded each month can be plotted. Clear seasonal activity, which probably tracks against total visits.
#Visual
ggplot(data = tx_injuries
, mapping = aes( x = eff_date, y = injuries)) +
geom_col(fill = "#1F618D", alpha = 0.75) +
scale_x_date(
date_labels = "%Y",
breaks = "1 year") +
labs(title = "Number of Injuries at Amusement Parks, By Month"
, caption = "Data by Data.world | #TidyTuesday") +
ylab("Injuries") +
xlab("Year") +
theme_minimal() +
theme(axis.text.x = element_text(hjust=-1.6))