import pandas as pd
import numpy as np


data = pd.read_csv("stage3.csv")
data.head()


columns_to_remove = [
    "source_url",
    "congressional_district",
    "location_description",
    "notes",
    "participant_name",
    "sources",
    "state_house_district",
    "state_senate_district",
]
data = data.drop(columns=columns_to_remove)
data.head()


labels = ["address", "incident_url", "incident_url_fields_missing"]
data = data.drop(columns=labels)
data.head()


data = data[data["date"].str.contains("2013") == False]
data.head()


data["date"] = pd.to_datetime(data["date"])
data.head()


data["year"] = data["date"].dt.year
data["month"] = data["date"].dt.month
data["day"] = data["date"].dt.day
data["month_year"] = data["date"].dt.to_period("M")
data.head()


import matplotlib.pyplot as plt
import seaborn as sns


# Collect the data
frequencies = {}
for _, row in data.iterrows():
    if row["n_killed"] not in frequencies:
        frequencies[row["n_killed"]] = 1
    else:
        frequencies[row["n_killed"]] += 1
for i in range(4):
    frequencies.pop(i, None)


# Plot the data
plt.bar(frequencies.keys(), frequencies.values(), width=0.7)
plt.xlim([0, 27])
plt.xlabel("Number of Fatalities")
plt.ylabel("Frequency")
plt.title("Distribution of Fatalities in Mass Shootings")
plt.show()


# Plotting normalized value counts of fatalities
k_freq = data["n_killed"].value_counts(normalize=True).iloc[0:6]
sns.barplot(x=k_freq.index, y=k_freq.values)
plt.xlabel("Number of fatalities")
plt.ylabel("Percentage")
plt.title("Distribution of Fatalities Normalized")
plt.show()


# Collecting the data
gun_types = {"Handgun": 0, "Rifle": 0, "Shotgun": 0}
gun_type_df = data.dropna(subset=["gun_type"])
for _, row in gun_type_df.iterrows():
    gun_types["Handgun"] += row["gun_type"].count("Handgun")
    gun_types["Rifle"] += row["gun_type"].count("Rifle")
    gun_types["Shotgun"] += row["gun_type"].count("Shotgun")


# Plotting the data
plt.bar(gun_types.keys(), gun_types.values())
plt.xlabel("Gun Type")
plt.ylabel("Frequency")
plt.title("Frequency of Different Gun Types Used in Shootings")
plt.show()


# Create catagories
male_vs_female = {"Child 0-11": [0, 0], "Teen 12-17": [0, 0], "Adult 18+": [0, 0]}
gender_age_df = data.dropna(subset=["participant_gender", "participant_age_group"])

# Collect data
for _, row in gender_age_df.iterrows():
    tokens_gender = row["participant_gender"].split("||")
    tokens_gender = [e[3:] for e in tokens_gender]
    tokens_age_grp = row["participant_age_group"].split("||")
    tokens_age_grp = [e[3:] for e in tokens_age_grp]
    result = list(zip(tokens_gender, tokens_age_grp))
    for pair in result:
        if pair[0] == "Male":
            if pair[1] == "Child 0-11":
                male_vs_female["Child 0-11"][0] += 1
            elif pair[1] == "Teen 12-17":
                male_vs_female["Teen 12-17"][0] += 1
            elif pair[1] == "Adult 18+":
                male_vs_female["Adult 18+"][0] += 1
        elif pair[0] == "Female":
            if pair[1] == "Child 0-11":
                male_vs_female["Child 0-11"][1] += 1
            elif pair[1] == "Teen 12-17":
                male_vs_female["Teen 12-17"][1] += 1
            elif pair[1] == "Adult 18+":
                male_vs_female["Adult 18+"][1] += 1


# Create labels
labels = ["Adult", "Teen", "Child"]
male_data = [
    male_vs_female["Adult 18+"][0],
    male_vs_female["Teen 12-17"][0],
    male_vs_female["Child 0-11"][0],
]
female_data = [
    male_vs_female["Adult 18+"][1],
    male_vs_female["Teen 12-17"][1],
    male_vs_female["Child 0-11"][1],
]

# Plot data
x_axis = np.arange(len(labels))
width = 0.35

fig, ax = plt.subplots()
fig.set_figwidth(10)
fig.set_figheight(8)
rects1 = ax.bar(x_axis - width / 2, male_data, width, label="Male")
rects2 = ax.bar(x_axis + width / 2, female_data, width, label="Female")

ax.set_xlabel("Age Group")
ax.set_ylabel("Amount of Involvement")
ax.set_ylim([0, 275000])
ax.set_title("Male Verses Female Involvement in Gun Violence")
ax.set_xticks(x_axis, labels)
ax.legend()
ax.bar_label(rects1, padding=3)
ax.bar_label(rects2, padding=3)
plt.show()


def mean_age_of_participants(row):
    ages = {k: 0 for k in range(15, 75)}
    for age in ages.keys():
        count = row.count(str(age))
        ages[age] += count
    lst = []
    for key, value in ages.items():
        if key * value != 0:
            lst.append(key * value)
    sum_of_ages, num_of_ages = float(sum(lst)), float(len(lst))
    if sum_of_ages == 0:
        return "Invalid"
    else:
        return sum_of_ages / num_of_ages


age_df = data.dropna(subset=["participant_age"])
raw, filtered = [], []
for _, row in age_df.iterrows():
    [mean_age, lethality] = mean_age_of_participants(row["participant_age"]), float(
        ((2 * row["n_killed"]) + (1.5 * row["n_injured"]))
    )
    raw.append([mean_age, lethality])
for entry in raw:
    if entry[0] != "Invalid":
        filtered.append(entry)


x_data, y_data = [], []
for entry in filtered:
    if entry[0] < 75 and entry[1] < 100:
        x_data.append(entry[0])
        y_data.append(entry[1])
[slope, intercept] = np.polyfit(x_data, y_data, 1)
plt.figure(figsize=(10, 8))
plt.scatter(x_data, y_data, s=30, edgecolor="black")
plt.xlabel("Mean Age of Participants")
plt.ylabel("Measure of Lethality")
plt.title(
    "Mean Age of Participants Between 15 and 75 Years Old In Shootings Verses Lethality"
)
plt.plot(np.asarray(x_data), slope * np.asarray(x_data) + intercept, color="orange")
plt.show()


# Getting GeoJson of US states from the folium and saving as geopandas(so we can add GeoJson tooltips)
# Source: https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/us-states.json
import geopandas as gpd

state_geo = gpd.read_file("data/us-states.json")


# Summing up incidents per state
incident_count = data["state"].value_counts().reset_index()
incident_count.columns = ["name", "count"]
# Then merging since folium only does one data source for GeoJson
state_geo_count = state_geo.merge(incident_count, on="name")


from folium import Map, Choropleth
from folium.features import GeoJson, GeoJsonTooltip

# Creating map and choropleth
total_shootings_by_state_map = Map(location=[43, -102], zoom_start=4)

Choropleth(
    geo_data=state_geo,
    data=incident_count,
    bins=9,
    columns=["name", "count"],
    key_on="feature.properties.name",
    legend_name="Total shootings in state from 2014-2018",
    fill_color="YlOrRd",
    fill_opacity=0.7,
    line_opacity=0.5,
    reset=True,
).add_to(total_shootings_by_state_map)

<folium.features.Choropleth at 0x1a414bfd0>


# Styling functions and gjson tooltips
style = lambda x: {
    "fillColor": "#ffffff",
    "color": "#000000",
    "fillOpacity": 0.1,
    "weight": 0.1,
}

highlight = lambda x: {
    "fillColor": "#000000",
    "color": "#000000",
    "fillOpacity": 0.30,
    "weight": 0.1,
}

gjson = GeoJson(
    data=state_geo_count,
    style_function=style,
    highlight_function=highlight,
    control=False,
    tooltip=GeoJsonTooltip(
        fields=["name", "count"],
        aliases=["State", "Shootings"],
    ),
)
total_shootings_by_state_map.add_child(gjson)
total_shootings_by_state_map.keep_in_front(gjson)


# Showing the map
total_shootings_by_state_map


# Making heatmap dataframe and parsing it to fit constraints
heatmap_df = data.dropna(subset=["month_year", "latitude", "longitude"])
heat_data = (
    heatmap_df[["month_year", "latitude", "longitude"]]
    .groupby("month_year")
    .apply(lambda row: [list(tup) for tup in zip(row["latitude"], row["longitude"])])
    .tolist()
)


# Getting list of all time values sorted
time_index = list(heatmap_df["month_year"].astype("str").sort_values().unique())


from folium.plugins import HeatMapWithTime

# Making a heatmap object and inputting data
heatmap = Map(location=[43, -102], zoom_start=4)

HeatMapWithTime(
    heat_data,
    index=time_index,
    radius=10,
    auto_play=False,
    speed_step=1,
    min_speed=1,
).add_to(heatmap)

<folium.plugins.heat_map_withtime.HeatMapWithTime at 0x1adf256c0>


heatmap


# Getting total count of incidents for every month
cpm = data["month_year"].value_counts().sort_index().to_frame()
cpm.columns = ["count"]
cpm["year"] = cpm.index.year
cpm["month"] = cpm.index.month
# months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
cpm.head()


sns.set_theme()
election_df = cpm.query("year >= 2015 and year <= 2017")
sns.relplot(election_df, x="month", y="count", col="year", kind="line")

<seaborn.axisgrid.FacetGrid at 0x106c94fd0>


sns.lineplot(election_df, x="month", y="count", hue="year").set(
    title="Total shootings per month over 2015-2017"
)

[Text(0.5, 1.0, 'Total shootings per month over 2015-2017')]


sns.lineplot(cpm, x="month", y="count", hue="year").set(
    title="Total gun violence per month over 2014-2018"
)

[Text(0.5, 1.0, 'Total gun violence per month over 2014-2018')]


# Getting average incidents per month
mc = cpm.groupby("month")["count"].mean()
sns.lineplot(x=mc.index, y=mc.values)
plt.xlabel("Month")
plt.ylabel("Average volume")
plt.title("Average volume of gun violence per month")
plt.show()


yc = cpm.groupby("year")["count"].sum()
yc

year
2014    51854
2015    53579
2016    58763
2017    61401
2018    13802
Name: count, dtype: int64


yc = yc.drop(index=yc.index[-1])
yc

year
2014    51854
2015    53579
2016    58763
2017    61401
Name: count, dtype: int64


sns.barplot(x=yc.index, y=yc.values)
plt.xlabel("Year")
plt.ylabel("Total volume of gun violence")
plt.title("Total volume of gun violence per year")
plt.show()


import scipy.stats as stats
fvalue, pvalue = stats.f_oneway(male_vs_female["Adult 18+"], male_vs_female["Teen 12-17"], male_vs_female["Child 0-11"])
print(f"The test produced the following:\nThe F-Score was {fvalue} and the p-value was {pvalue}")

The test produced the following:
The F-Score was 1.6177923487325807 and the p-value was 0.33370760815857897


stats.ttest_ind(a=x_data, b=y_data, equal_var=True)

Ttest_indResult(statistic=945.8095355323632, pvalue=0.0)


cpd = data["date"].value_counts().sort_index().to_frame().copy()
cpd.index = pd.to_datetime(cpd.index)

cpd.columns = ["count"]
cpd["year"] = cpd.index.year
cpd["month"] = cpd.index.month
cpd["day"] = cpd.index.day
cpd.head()


# Fit training data with linear regression
from sklearn.linear_model import LinearRegression

train_df = cpd.query("year < 2018").copy()

X_train = train_df.iloc[:, 1:4].values
y_train = train_df.iloc[:, 0].values.reshape(-1, 1)
reg = LinearRegression()
reg.fit(X_train, y_train)

train_df["pred"] = train_df.apply(
    lambda row: float(reg.predict([[row["year"], row["month"], row["day"]]])), axis=1
)
train_df = train_df.drop(columns=["year", "day", "month"])
# Checking the score
reg.score(X_train, y_train)

0.18705909256762032


plt.xticks(rotation=45)
sns.lineplot(data=train_df)
plt.xlabel("Date")
plt.ylabel("Total volume")
plt.title("Total volume of gun violence per day with linear regression")
plt.show()


# Run predictions on 2018 data
test_df = cpd.query("year == 2018").copy()
X_test = test_df.iloc[:, 1:4].values
y_test = test_df.iloc[:, 0].values.reshape(-1, 1)
test_df["pred"] = test_df.apply(
    lambda row: float(reg.predict([[row["year"], row["month"], row["day"]]])), axis=1
)
test_df = test_df.drop(columns=["year", "day", "month"])
reg.score(X_test, y_test)

-0.8194541412845129


plt.xticks(rotation=45)
sns.lineplot(data=test_df)
plt.xlabel("Date")
plt.ylabel("Total volume")
plt.title("Total volume of gun violence per day with regression prediction")
plt.show()


# Polynomial regression training
from sklearn.preprocessing import PolynomialFeatures

train_df = cpd.query("year < 2018").copy()

X_train = train_df.iloc[:, 1:4].values
y_train = train_df.iloc[:, 0].values.reshape(-1, 1)
poly = PolynomialFeatures(5)
poly_X_train = poly.fit_transform(X_train)

clf = LinearRegression()
clf.fit(poly_X_train, y_train)

train_df["pred"] = train_df.apply(
    lambda row: float(
        clf.predict(poly.fit_transform([[row["year"], row["month"], row["day"]]]))
    ),
    axis=1,
)
train_df = train_df.drop(columns=["year", "day", "month"])
clf.score(poly_X_train, y_train)

0.11193099911558801


plt.xticks(rotation=45)
sns.lineplot(data=train_df)
plt.xlabel("Date")
plt.ylabel("Total volume")
plt.title("Total volume of gun violence per day with polynomial regression")
plt.show()


# Polynomial regression on 2018 data
test_df = cpd.query("year == 2018").copy()
X_test = test_df.iloc[:, 1:4].values
y_test = test_df.iloc[:, 0].values.reshape(-1, 1)

poly_X_test = poly.fit_transform(X_test)

test_df["pred"] = test_df.apply(
    lambda row: float(
        clf.predict(
            poly.fit_transform(
                [
                    [
                        row["year"],
                        row["month"],
                        row["day"],
                    ]
                ]
            )
        )
    ),
    axis=1,
)
test_df = test_df.drop(columns=["year", "day", "month"])
clf.score(poly_X_test, y_test)

-1.4608151418296078


plt.xticks(rotation=45)
sns.lineplot(data=test_df)
plt.xlabel("Date")
plt.ylabel("Total volume")
plt.title("Total volume of gun violence per day with polynomial regression prediction")
plt.show()


yc = yc.to_frame().reset_index()
yc


# Fit year count with linear regression
reg = LinearRegression()
X = yc["year"].values.reshape(-1, 1)
y = yc["count"].values.reshape(-1, 1)
reg.fit(X, y)

LinearRegression()

LinearRegression()


reg.score(X, y)

0.966034042758312


# Add predictions to dataframe
yc["pred"] = yc.apply(
    lambda row: float(reg.predict(row["year"].reshape(-1, 1))), axis=1
)
yc = yc.set_index("year")


sns.lineplot(data=yc)
plt.xlabel("Year")
plt.ylabel("Total volume")
plt.title("Total volume of gun violence per year with linear regression line")
plt.show()

	incident_id	date	state	city_or_county	address	n_killed	n_injured	incident_url	source_url	incident_url_fields_missing	...	participant_age	participant_age_group	participant_gender	participant_name	participant_relationship	participant_status	participant_type	sources	state_house_district	state_senate_district
0	461105	2013-01-01	Pennsylvania	Mckeesport	1506 Versailles Avenue and Coursin Street	0	4	http://www.gunviolencearchive.org/incident/461105	http://www.post-gazette.com/local/south/2013/0...	False	...	0::20	0::Adult 18+\|\|1::Adult 18+\|\|2::Adult 18+\|\|3::A...	0::Male\|\|1::Male\|\|3::Male\|\|4::Female	0::Julian Sims	NaN	0::Arrested\|\|1::Injured\|\|2::Injured\|\|3::Injure...	0::Victim\|\|1::Victim\|\|2::Victim\|\|3::Victim\|\|4:...	http://pittsburgh.cbslocal.com/2013/01/01/4-pe...	NaN	NaN
1	460726	2013-01-01	California	Hawthorne	13500 block of Cerise Avenue	1	3	http://www.gunviolencearchive.org/incident/460726	http://www.dailybulletin.com/article/zz/201301...	False	...	0::20	0::Adult 18+\|\|1::Adult 18+\|\|2::Adult 18+\|\|3::A...	0::Male	0::Bernard Gillis	NaN	0::Killed\|\|1::Injured\|\|2::Injured\|\|3::Injured	0::Victim\|\|1::Victim\|\|2::Victim\|\|3::Victim\|\|4:...	http://losangeles.cbslocal.com/2013/01/01/man-...	62.0	35.0
2	478855	2013-01-01	Ohio	Lorain	1776 East 28th Street	1	3	http://www.gunviolencearchive.org/incident/478855	http://chronicle.northcoastnow.com/2013/02/14/...	False	...	0::25\|\|1::31\|\|2::33\|\|3::34\|\|4::33	0::Adult 18+\|\|1::Adult 18+\|\|2::Adult 18+\|\|3::A...	0::Male\|\|1::Male\|\|2::Male\|\|3::Male\|\|4::Male	0::Damien Bell\|\|1::Desmen Noble\|\|2::Herman Sea...	NaN	0::Injured, Unharmed, Arrested\|\|1::Unharmed, A...	0::Subject-Suspect\|\|1::Subject-Suspect\|\|2::Vic...	http://www.morningjournal.com/general-news/201...	56.0	13.0
3	478925	2013-01-05	Colorado	Aurora	16000 block of East Ithaca Place	4	0	http://www.gunviolencearchive.org/incident/478925	http://www.dailydemocrat.com/20130106/aurora-s...	False	...	0::29\|\|1::33\|\|2::56\|\|3::33	0::Adult 18+\|\|1::Adult 18+\|\|2::Adult 18+\|\|3::A...	0::Female\|\|1::Male\|\|2::Male\|\|3::Male	0::Stacie Philbrook\|\|1::Christopher Ratliffe\|\|...	NaN	0::Killed\|\|1::Killed\|\|2::Killed\|\|3::Killed	0::Victim\|\|1::Victim\|\|2::Victim\|\|3::Subject-Su...	http://denver.cbslocal.com/2013/01/06/officer-...	40.0	28.0
4	478959	2013-01-07	North Carolina	Greensboro	307 Mourning Dove Terrace	2	2	http://www.gunviolencearchive.org/incident/478959	http://www.journalnow.com/news/local/article_d...	False	...	0::18\|\|1::46\|\|2::14\|\|3::47	0::Adult 18+\|\|1::Adult 18+\|\|2::Teen 12-17\|\|3::...	0::Female\|\|1::Male\|\|2::Male\|\|3::Female	0::Danielle Imani Jameison\|\|1::Maurice Eugene ...	3::Family	0::Injured\|\|1::Injured\|\|2::Killed\|\|3::Killed	0::Victim\|\|1::Victim\|\|2::Victim\|\|3::Subject-Su...	http://myfox8.com/2013/01/08/update-mother-sho...	62.0	27.0

	incident_id	date	state	city_or_county	address	n_killed	n_injured	incident_url	incident_url_fields_missing	gun_stolen	...	incident_characteristics	latitude	longitude	n_guns_involved	participant_age	participant_age_group	participant_gender	participant_relationship	participant_status	participant_type
0	461105	2013-01-01	Pennsylvania	Mckeesport	1506 Versailles Avenue and Coursin Street	0	4	http://www.gunviolencearchive.org/incident/461105	False	NaN	...	Shot - Wounded/Injured\|\|Mass Shooting (4+ vict...	40.3467	-79.8559	NaN	0::20	0::Adult 18+\|\|1::Adult 18+\|\|2::Adult 18+\|\|3::A...	0::Male\|\|1::Male\|\|3::Male\|\|4::Female	NaN	0::Arrested\|\|1::Injured\|\|2::Injured\|\|3::Injure...	0::Victim\|\|1::Victim\|\|2::Victim\|\|3::Victim\|\|4:...
1	460726	2013-01-01	California	Hawthorne	13500 block of Cerise Avenue	1	3	http://www.gunviolencearchive.org/incident/460726	False	NaN	...	Shot - Wounded/Injured\|\|Shot - Dead (murder, a...	33.9090	-118.3330	NaN	0::20	0::Adult 18+\|\|1::Adult 18+\|\|2::Adult 18+\|\|3::A...	0::Male	NaN	0::Killed\|\|1::Injured\|\|2::Injured\|\|3::Injured	0::Victim\|\|1::Victim\|\|2::Victim\|\|3::Victim\|\|4:...
2	478855	2013-01-01	Ohio	Lorain	1776 East 28th Street	1	3	http://www.gunviolencearchive.org/incident/478855	False	0::Unknown\|\|1::Unknown	...	Shot - Wounded/Injured\|\|Shot - Dead (murder, a...	41.4455	-82.1377	2.0	0::25\|\|1::31\|\|2::33\|\|3::34\|\|4::33	0::Adult 18+\|\|1::Adult 18+\|\|2::Adult 18+\|\|3::A...	0::Male\|\|1::Male\|\|2::Male\|\|3::Male\|\|4::Male	NaN	0::Injured, Unharmed, Arrested\|\|1::Unharmed, A...	0::Subject-Suspect\|\|1::Subject-Suspect\|\|2::Vic...
3	478925	2013-01-05	Colorado	Aurora	16000 block of East Ithaca Place	4	0	http://www.gunviolencearchive.org/incident/478925	False	NaN	...	Shot - Dead (murder, accidental, suicide)\|\|Off...	39.6518	-104.8020	NaN	0::29\|\|1::33\|\|2::56\|\|3::33	0::Adult 18+\|\|1::Adult 18+\|\|2::Adult 18+\|\|3::A...	0::Female\|\|1::Male\|\|2::Male\|\|3::Male	NaN	0::Killed\|\|1::Killed\|\|2::Killed\|\|3::Killed	0::Victim\|\|1::Victim\|\|2::Victim\|\|3::Subject-Su...
4	478959	2013-01-07	North Carolina	Greensboro	307 Mourning Dove Terrace	2	2	http://www.gunviolencearchive.org/incident/478959	False	0::Unknown\|\|1::Unknown	...	Shot - Wounded/Injured\|\|Shot - Dead (murder, a...	36.1140	-79.9569	2.0	0::18\|\|1::46\|\|2::14\|\|3::47	0::Adult 18+\|\|1::Adult 18+\|\|2::Teen 12-17\|\|3::...	0::Female\|\|1::Male\|\|2::Male\|\|3::Female	3::Family	0::Injured\|\|1::Injured\|\|2::Killed\|\|3::Killed	0::Victim\|\|1::Victim\|\|2::Victim\|\|3::Subject-Su...

	incident_id	date	state	city_or_county	n_killed	n_injured	gun_stolen	gun_type	incident_characteristics	latitude	longitude	n_guns_involved	participant_age	participant_age_group	participant_gender	participant_relationship	participant_status	participant_type
0	461105	2013-01-01	Pennsylvania	Mckeesport	0	4	NaN	NaN	Shot - Wounded/Injured\|\|Mass Shooting (4+ vict...	40.3467	-79.8559	NaN	0::20	0::Adult 18+\|\|1::Adult 18+\|\|2::Adult 18+\|\|3::A...	0::Male\|\|1::Male\|\|3::Male\|\|4::Female	NaN	0::Arrested\|\|1::Injured\|\|2::Injured\|\|3::Injure...	0::Victim\|\|1::Victim\|\|2::Victim\|\|3::Victim\|\|4:...
1	460726	2013-01-01	California	Hawthorne	1	3	NaN	NaN	Shot - Wounded/Injured\|\|Shot - Dead (murder, a...	33.9090	-118.3330	NaN	0::20	0::Adult 18+\|\|1::Adult 18+\|\|2::Adult 18+\|\|3::A...	0::Male	NaN	0::Killed\|\|1::Injured\|\|2::Injured\|\|3::Injured	0::Victim\|\|1::Victim\|\|2::Victim\|\|3::Victim\|\|4:...
2	478855	2013-01-01	Ohio	Lorain	1	3	0::Unknown\|\|1::Unknown	0::Unknown\|\|1::Unknown	Shot - Wounded/Injured\|\|Shot - Dead (murder, a...	41.4455	-82.1377	2.0	0::25\|\|1::31\|\|2::33\|\|3::34\|\|4::33	0::Adult 18+\|\|1::Adult 18+\|\|2::Adult 18+\|\|3::A...	0::Male\|\|1::Male\|\|2::Male\|\|3::Male\|\|4::Male	NaN	0::Injured, Unharmed, Arrested\|\|1::Unharmed, A...	0::Subject-Suspect\|\|1::Subject-Suspect\|\|2::Vic...
3	478925	2013-01-05	Colorado	Aurora	4	0	NaN	NaN	Shot - Dead (murder, accidental, suicide)\|\|Off...	39.6518	-104.8020	NaN	0::29\|\|1::33\|\|2::56\|\|3::33	0::Adult 18+\|\|1::Adult 18+\|\|2::Adult 18+\|\|3::A...	0::Female\|\|1::Male\|\|2::Male\|\|3::Male	NaN	0::Killed\|\|1::Killed\|\|2::Killed\|\|3::Killed	0::Victim\|\|1::Victim\|\|2::Victim\|\|3::Subject-Su...
4	478959	2013-01-07	North Carolina	Greensboro	2	2	0::Unknown\|\|1::Unknown	0::Handgun\|\|1::Handgun	Shot - Wounded/Injured\|\|Shot - Dead (murder, a...	36.1140	-79.9569	2.0	0::18\|\|1::46\|\|2::14\|\|3::47	0::Adult 18+\|\|1::Adult 18+\|\|2::Teen 12-17\|\|3::...	0::Female\|\|1::Male\|\|2::Male\|\|3::Female	3::Family	0::Injured\|\|1::Injured\|\|2::Killed\|\|3::Killed	0::Victim\|\|1::Victim\|\|2::Victim\|\|3::Subject-Su...

	incident_id	date	state	city_or_county	n_killed	n_injured	gun_stolen	gun_type	incident_characteristics	latitude	longitude	n_guns_involved	participant_age	participant_age_group	participant_gender	participant_relationship	participant_status	participant_type
278	95289	2014-01-01	Michigan	Muskegon	0	0	NaN	NaN	Shots Fired - No Injuries	43.2301	-86.2514	NaN	NaN	0::Adult 18+	0::Female	NaN	0::Unharmed	0::Victim
279	92401	2014-01-01	New Jersey	Newark	0	0	NaN	NaN	Officer Involved Incident	40.7417	-74.1695	NaN	NaN	NaN	NaN	NaN	NaN	NaN
280	92383	2014-01-01	New York	Queens	1	0	NaN	NaN	Shot - Dead (murder, accidental, suicide)	40.7034	-73.7474	NaN	0::22\|\|1::26	0::Adult 18+\|\|1::Adult 18+	0::Male\|\|1::Male	NaN	0::Killed\|\|1::Unharmed	0::Victim\|\|1::Subject-Suspect
281	92142	2014-01-01	New York	Brooklyn	0	1	NaN	NaN	Shot - Wounded/Injured	40.6715	-73.9476	NaN	0::34	0::Adult 18+\|\|1::Adult 18+	0::Male\|\|1::Male	NaN	0::Injured	0::Victim\|\|1::Subject-Suspect
282	95261	2014-01-01	Missouri	Springfield	0	1	NaN	NaN	Shot - Wounded/Injured	37.2646	-93.3007	NaN	0::6\|\|1::12	0::Child 0-11\|\|1::Teen 12-17	0::Female	NaN	0::Injured\|\|1::Unharmed	0::Victim\|\|1::Subject-Suspect

	incident_id	date	state	city_or_county	n_killed	n_injured	gun_stolen	gun_type	incident_characteristics	latitude	longitude	n_guns_involved	participant_age	participant_age_group	participant_gender	participant_relationship	participant_status	participant_type
278	95289	2014-01-01	Michigan	Muskegon	0	0	NaN	NaN	Shots Fired - No Injuries	43.2301	-86.2514	NaN	NaN	0::Adult 18+	0::Female	NaN	0::Unharmed	0::Victim
279	92401	2014-01-01	New Jersey	Newark	0	0	NaN	NaN	Officer Involved Incident	40.7417	-74.1695	NaN	NaN	NaN	NaN	NaN	NaN	NaN
280	92383	2014-01-01	New York	Queens	1	0	NaN	NaN	Shot - Dead (murder, accidental, suicide)	40.7034	-73.7474	NaN	0::22\|\|1::26	0::Adult 18+\|\|1::Adult 18+	0::Male\|\|1::Male	NaN	0::Killed\|\|1::Unharmed	0::Victim\|\|1::Subject-Suspect
281	92142	2014-01-01	New York	Brooklyn	0	1	NaN	NaN	Shot - Wounded/Injured	40.6715	-73.9476	NaN	0::34	0::Adult 18+\|\|1::Adult 18+	0::Male\|\|1::Male	NaN	0::Injured	0::Victim\|\|1::Subject-Suspect
282	95261	2014-01-01	Missouri	Springfield	0	1	NaN	NaN	Shot - Wounded/Injured	37.2646	-93.3007	NaN	0::6\|\|1::12	0::Child 0-11\|\|1::Teen 12-17	0::Female	NaN	0::Injured\|\|1::Unharmed	0::Victim\|\|1::Subject-Suspect

An In-Depth Analysis of Gun Violence in America¶

Will M, Ethan B, Zichao L¶

Part 1: Introduction¶

Part 2: Data¶

Part 3 - Analysis¶

Graphs¶

Distribution of Fatalities in Mass Shootings¶

Distribution of Fatalities Normalized¶

Frequency of Different Gun Types Used in Shootings¶

Male Verses Deaths Involvement in Gun Violence¶

Mean Age of Participants Between 15 and 75 Verses Lethality¶

Maps¶

MAP:¶

Overlaid:¶

Part 4: Hypothesis Testing and ML¶

Hypothesis Testing¶

ANOVA¶

T-Test¶

Machine Learning¶

Part 5: Conclusion¶

	count	year	month
2014-01	4395	2014	1
2014-02	3045	2014	2
2014-03	3669	2014	3
2014-04	3891	2014	4
2014-05	4320	2014	5

	count	year	month	day
2014-01-01	216	2014	1	1
2014-01-02	119	2014	1	2
2014-01-03	124	2014	1	3
2014-01-04	140	2014	1	4
2014-01-05	130	2014	1	5