A dashboard of forecasts of the Presidential Election for 2020.
Visual can be found here: https://public.tableau.com/profile/michael.dunphy8764#!/vizhome/2020Elections-USPresident/MainDashboard?publish=yes
Data Source: https://en.wikipedia.org/wiki/2020_United_States_presidential_election
Below is the code used to collect and modify the data to produce the Tableau visual.
!pip install lxml
!pip install html5lib
import requests
import pandas as pd
from bs4 import BeautifulSoup
# Access Wiki page with forcasting data from Cook, IE, Sabato, Politico, RCP, Niskanen, CNN,
# The Economist, CBS News, 270toWin, ABC News, NPR, NBC News, 538, CNalysis
r = requests.get('https://en.wikipedia.org/wiki/2020_United_States_presidential_election')
root = BeautifulSoup(r.content)
find = root.find('table', class_="wikitable sortable")
li = pd.read_html(find.prettify())
ratings = pd.DataFrame(li[0])
# Ratings will be the main data set cleaned and used for the visual
ratings.head()
# Data table of dates the columns were last updated on Wiki
updated = pd.DataFrame(ratings.columns.values[4:])
updated.to_csv("updated.csv", index= False)
updated.head()
# Data Cleaning
ratings.columns = ["State", "Electoral Votes", "PVI", "2016 result", "Average Ratings", "Cook", "IE", "Sabato",
"Politico", "RCP", "Niskanen", "CNN", "The Economist", "CBS", "270toWin", "ABC", "NPR", "NBC",
"538", "CNalysis"]
ratings.drop(ratings.index[-1:], inplace= True)
ratings.head()
# Used to count the number of groupings for each available state
def get_count(row, prediction):
count = 0
column_names = ratings.columns.values[5:19]
for col in column_names:
if prediction in row[col]:
count += 1
return count
# Used to get consensus ruling for the state of the race for that state
def get_consensus(row):
m = 0
column_names = ratings.columns.values[20:30]
for col in column_names:
if row[col] >= m:
m = ratings.iloc[index][col]
prediction = col
return prediction, m
# Adds numbers of groups to each state
safe_r = []
likely_r = []
lean_r = []
tilt_r = []
toss = []
tilt_d = []
lean_d = []
likely_d = []
safe_d = []
for index, row in ratings.iterrows():
safe_r.append(get_count(row, "Safe R"))
likely_r.append(get_count(row, "Likely R"))
lean_r_c = 0
lean_r_c += (get_count(row, "Leans R"))
lean_r_c += (get_count(row, "Lean R"))
lean_r.append(lean_r_c)
tilt_r.append(get_count(row, "Tilt R"))
toss.append(get_count(row, "Tossup"))
safe_d.append(get_count(row, "Safe D"))
likely_d.append(get_count(row, "Likely D"))
lean_d.append(get_count(row, "Lean D"))
tilt_d.append(get_count(row, "Tilt D"))
ratings["Safe R"] = safe_r
ratings["Likely R"] = likely_r
ratings["Lean R"] = lean_r
ratings["Tilt R"] = tilt_r
ratings["Tossup"] = toss
ratings["Safe D"] = safe_d
ratings["Likely D"] = likely_d
ratings["Lean D"] = lean_d
ratings["Tilt D"] = tilt_d
ratings.head()
# Adds consensus ruling, max number of same groupings, and confidence of consensus ruling
prediction = []
m = []
for index, row in ratings.iterrows():
p, em = get_consensus(row)
prediction.append(p)
m.append(em)
ratings["Consensus"] = prediction
ratings["Max"] = m
ratings["Confidence"] = ratings["Max"] / 15
ratings.head()
# Combines data to be used in visual
combined = ratings
combined.to_csv('combined.csv', index = False)
combined.head()