You too can be an #electionscientist! R Code for processing the Texas early voting returns

The Texas returns are pretty easy to deal with, because they are reported in one place and the webpage has the results in a well formatted table.

The code below requires that you have tidyverse, rvest, and ggplot2 packages for R installed.

 

library(tidyverse)

library(rvest)

# Dates to scrape
dates18 <- c(“oct22”, “oct23”, “oct24”, “oct25”, “oct26”, “oct27”, “oct28”, “oct29”, “oct30”, “oct31”)
dates14 <- c(“oct20”, “oct21”, “oct22”, “oct23”, “oct24”, “oct25”, “oct26”, “oct27”, “oct28”, “oct29”, “oct30”, “oct31”)

# Look at the website and see how the columns are set up, create an empty data frame to load the selected dates into and name columns as you like.

texas18 <- data.frame(County=character(),
RegVoters=character(),
InPerson=character(),
CumulativeInPerson=character(),
CumulativePctInPerson=character(),
CumulativeByMail=character(),
CumulativePctByMail=character(),
CumulativeInPersonByMail=character(),
CumulativeTurnoutPercent=character(),
DataDate=character(),
stringsAsFactors=FALSE)

texas14 <- data.frame(County=character(),
RegVoters=character(),
InPerson=character(),
CumulativeInPerson=character(),
#CumulativePctInPerson=character(), #Not in 2014 data
CumulativeByMail=character(),
CumulativePctByMail=character(),
CumulativeInPersonByMail=character(),
CumulativeTurnoutPercent=character(),
DataDate=character(),
stringsAsFactors=FALSE)

# Next loop through the dates to pull the data down for each one and then append to a growing dataframe

for (i in dates18){

date18data <- read_html(paste0(“https://www.sos.state.tx.us/elections/earlyvoting/2018/”, i, “.shtml”))

date18data <- data.frame(html_table(date18data)) # This command is from rvest

date18data$date <- i
colnames(date18data) <-c(“County”, “RegVoters”, “InPerson”, “CumulativeInPerson”, “CumulativePctInPerson”, “CumulativeByMail”, “CumulativePctByMail”, “CumulativeInPersonByMail”, “CumulativeTurnoutPercent”, “DataDate”)
texas18 <- bind_rows(texas18, date18data)
}

# Next clean data to remove the commmas and symbols and make them numeric values we can manipulate. Note these two lines can be collapsed, but broken out here to show for effect. Now if you use this ALOT it would be wise to make a function so you avoid copy and paste errors!

# Remove commas…
texas18[2:9] <- lapply(texas18[2:9], function(x) gsub(“,”, “”, x))

# Remove percentage symbols….
texas18[2:9] <- lapply(texas18[2:9], function(x) gsub(“%”, “”, x))

# Convert to numbers
texas18[2:9] <- lapply(texas18[2:9], function(x) as.numeric(x))

# Next loop through the dates to pull the data down for each one and then append to a growing dataframe

for (i in dates14){

date14data <- read_html(paste0(“https://www.sos.state.tx.us/elections/earlyvoting/2014/”, i, “.shtml”))

date14data <- data.frame(html_table(date14data)) # This command is from rvest

date14data$date <- i
colnames(date14data) <-c(“County”, “RegVoters”, “InPerson”, “CumulativeInPerson”, “CumulativeByMail”, “CumulativePctByMail”, “CumulativeInPersonByMail”, “CumulativeTurnoutPercent”, “DataDate”)
texas14 <- bind_rows(texas14, date14data)
}

# Next clean data to remove the commmas and symbols and make them numeric values we can manipulate. Note these two lines can be collapsed, but broken out here to show for effect. Now if you use this ALOT it would be wise to make a function so you avoid copy and paste errors!

# Remove commas…
texas14[2:8] <- lapply(texas14[2:8], function(x) gsub(“,”, “”, x))

# Remove percentage symbols….
texas14[2:8] <- lapply(texas14[2:8], function(x) gsub(“%”, “”, x))

# Convert to numbers
texas14[2:8] <- lapply(texas14[2:8], function(x) as.numeric(x))

# Grab 2014 early voting percentages for 2018 comparisons. Note we just grab the final day of data for 2014

texas14_final <- texas14 %>% filter(DataDate == “oct31”)
texas1814 <- texas18 %>% left_join(select(texas14_final, County, CumulativeInPersonByMail, CumulativeTurnoutPercent), by=c(“County” = “County”))

# Now our plots

ggplot(texas1814, aes(y=CumulativeTurnoutPercent.x, x=DataDate, group=County, color=County)) + geom_line() + labs(title=”Early Voting Totals by County Over Time”,x=”Date”, y = “2018 Early Voting Totals”)

ggplot(filter(texas1814, County != “Total”), aes(y=pctof14, x=DataDate, group=County, color=County)) + geom_line() + labs(title=”Early Voting Turnout As a Percent of 2014 FInal Early Votes for the 30 Largest Texas Counties”,x=”Date”, y = “2018 Early Voting Totals as a Percent of 2014 Final Total”) + geom_hline(aes(yintercept = 1)) + scale_y_continuous(labels = percent) +
geom_line(data = filter(texas1814, County == “Total”), aes(y = pctof14, x = DataDate), size = 1.5, color = “black”) +
geom_text(label = “30 County Total”, x = 8.6, y = 2.25, color = “black”) + theme_light()