import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
Choose Your data set
df = pd.read_csv("Causes_of_death_united_states.csv")
df.head()
Pre-Processing
df.dropna()
df = df.drop('113 Cause Name', 1)
df.columns = ['Year', "Cause Name", "State", "Deaths", "Death Rate"]
df = df.sort_values('Deaths',ascending = False)
Statistical Analysis
df.describe().T
sns.pairplot(df)
sum_cause_of_death = df.groupby('Cause Name').sum()
sum_cause_of_death.sort_values('Deaths',ascending = False) # descending
sns.boxplot(x=df["Deaths"], y=df["Cause Name"])
Exploratory Data Analysis
sns.barplot(x=df["Deaths"], y=df["Cause Name"])
!pip install chart_studio
import chart_studio.plotly as py
import plotly.graph_objs as go #go.figure
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
sum_state = df.groupby('State').sum() # Vlaues for Choropleth
sum_state = sum_state.drop('Year', 1)
sum_state
sum_state['Deaths'].unique()
vals = sum_state['Deaths'].unique()
z = []
for i in vals:
i = float(i)
z.append(i)
print(z)
data = dict(
type = 'choropleth', #key type
locations = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA",
"HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD",
"MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ",
"NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC",
"SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"],
locationmode = 'USA-states', #lets plotly know its USA
colorscale = 'Reds',
text = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA",
"HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD",
"MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ",
"NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC",
"SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"], #a list of what hovers over each of the locations
z = z, # equal to the values that are going to be shown to you in an actual color scale
colorbar = {'title':'Colorbar Title Here'}
)
#text must be in the same index location as locations
data
layout = dict(geo = {'scope':'usa'})
choromap = go.Figure(data = [data],layout=layout)
iplot(choromap)
Correlations
corr = sum_state.corr()
corr
sns.heatmap(corr, annot=True)
sns.regplot(x=df["Deaths"], y=df['Death Rate'])