import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
Choose Your data set
df = pd.read_csv("who_suicide_statistics.csv")
df.head()
Pre-Processing
df.dropna()
#df_sui=pd.DataFrame(df.groupby(['country','year'])['suicides_no'].sum().reset_index())
#df_sui
#count_max_sui=pd.DataFrame(df_sui.groupby('country')['suicides_no'].sum())
#count_max_sui
df.columns = ["country", "year", "sex", "age", "suicides", "population"]
df = df.sort_values('suicides',ascending = False) # makes the dataframes descending
Statistical Analysis
df.describe().T
sns.pairplot(df) # Scatter plot and Histograms
sns.boxplot(x=df["suicides"], y=df["age"]) #no Box and Whisker on age goup
Exploratory Data Analysis
sns.barplot(x=df["suicides"], y=df["age"]) #bar blot
!pip install chart_studio
import chart_studio.plotly as py
import plotly.graph_objs as go #go.figure
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
suicides_country = df.groupby('country').sum().reset_index()
suicides_country = df.drop('year', 1)
suicides_country
#impute mean
count = [ dict(
type = 'choropleth',
locations = suicides_country['country'],
locationmode='country names',
z = suicides_country['suicides'],
text = suicides_country['country'],
colorscale = 'Viridis',
autocolorscale = False,
reversescale = True,
marker = dict(
line = dict (
color = 'rgb(180,180,180)',
width = 0.5
) ),
colorbar = dict(
autotick =False,
title = 'Suicide by Country'),
) ]
layout = dict(
title = 'World Suicide Map',
geo = dict(
showframe = True,
showcoastlines = True,
projection = dict(
type = 'Mercator'
)
)
)
fig = dict( data=count, layout=layout )
iplot( fig, validate=False, filename='d3-world-map' )
import plotly.express as px
suicides_country.head()
suicides_country["suicides"].unique() #Make unique values for the list Z
#vals = sum_state['Deaths'].unique()
#z = []
#for i in vals:
#i = float(i)
#z.append(i)
#print(z)
'''data = dict(
type = 'choropleth', #key type
locations = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA",
"HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD",
"MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ",
"NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC",
"SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"],
locationmode = 'USA-states', #lets plotly know its USA
colorscale = 'Reds',
text = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA",
"HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD",
"MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ",
"NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC",
"SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"], #a list of what hovers over each of the locations
z = z, # equal to the values that are going to be shown to you in an actual color scale
colorbar = {'title':'Colorbar Title Here'}
)
#text must be in the same index location as locations'''
#data
"""layout = dict(geo = {'scope':'usa'})
choromap = go.Figure(data = [data],layout=layout)
iplot(choromap)"""
Correlations
corr = df.corr()
corr
sns.heatmap(corr, annot=True)
sns.regplot(x=df["suicides"], y=df['population'])