import altair as alt
import pandas as pd
# import gdown
import geopandas as gpd
import math
# Load violence reduction data
violence_df = pd.read_csv('https://drive.google.com/uc?id=1IVyt5fFTInsmeTOV9T6RILB5XLM2Gc5R')
# Preview
violence_df.head()
print(violence_df.columns)
Index(['CASE_NUMBER', 'DATE', 'BLOCK', 'VICTIMIZATION_PRIMARY',
'INCIDENT_PRIMARY', 'GUNSHOT_INJURY_I', 'UNIQUE_ID', 'ZIP_CODE', 'WARD',
'COMMUNITY_AREA', 'STREET_OUTREACH_ORGANIZATION', 'AREA', 'DISTRICT',
'BEAT', 'AGE', 'SEX', 'RACE', 'VICTIMIZATION_FBI_CD', 'INCIDENT_FBI_CD',
'VICTIMIZATION_FBI_DESCR', 'INCIDENT_FBI_DESCR',
'VICTIMIZATION_IUCR_CD', 'INCIDENT_IUCR_CD',
'VICTIMIZATION_IUCR_SECONDARY', 'INCIDENT_IUCR_SECONDARY',
'HOMICIDE_VICTIM_FIRST_NAME', 'HOMICIDE_VICTIM_MI',
'HOMICIDE_VICTIM_LAST_NAME', 'MONTH', 'DAY_OF_WEEK', 'HOUR',
'LOCATION_DESCRIPTION', 'STATE_HOUSE_DISTRICT', 'STATE_SENATE_DISTRICT',
'UPDATED', 'LATITUDE', 'LONGITUDE', 'LOCATION'],
dtype='object')
violence_df['AREA']
0 3.0
1 2.0
2 2.0
3 1.0
4 2.0
...
57343 1.0
57344 1.0
57345 1.0
57346 1.0
57347 1.0
Name: AREA, Length: 57348, dtype: float64
# Load vacancy and abandoned building data
vacancy_df = pd.read_csv('https://drive.google.com/uc?id=1ysYE_S9b3pEuTa2Oj6srR1M9BNLL70BQ')
# Preview
vacancy_df.head()
print(vacancy_df.columns)
##we dont have to use vacancy, as long as its about crime in the city
Index(['Docket Number', 'Violation Number', 'Issued Date',
'Issuing Department', 'Last Hearing Date', 'Property Address',
'Violation Type', 'Entity or Person(s)', 'Disposition Description',
'Total Fines', 'Total Administrative Costs', 'Interest Amount',
'Collection Costs or Attorney Fees', 'Court Cost',
'Original Total Amount Due', 'Total Paid', 'Current Amount Due',
'Latitude', 'Longitude', 'Location'],
dtype='object')
vacancy_df['Location']
0 POINT (-87.62766855145455 41.72433163710899)
1 POINT (-87.70884424604687 41.919468897981616)
2 POINT (-87.63393730049602 41.65966224784699)
3 POINT (-87.65249718861547 41.79047358708863)
4 POINT (-87.65493156629385 41.80222309496589)
...
4987 POINT (-87.68512937131078 41.76819724741188)
4988 POINT (-87.6498292763393 41.78117319861422)
4989 POINT (-87.68070798081595 41.77008677281995)
4990 POINT (-87.6850709395897 41.791241248062725)
4991 POINT (-87.68143791102256 41.80309671151574)
Name: Location, Length: 4992, dtype: object
violence_df['COMMUNITY_AREA']
0 EDGEWATER
1 MORGAN PARK
2 WASHINGTON HEIGHTS
3 MCKINLEY PARK
4 CHATHAM
...
57343 NEW CITY
57344 WASHINGTON PARK
57345 WASHINGTON PARK
57346 GREATER GRAND CROSSING
57347 NEW CITY
Name: COMMUNITY_AREA, Length: 57348, dtype: object
##sampling violent data cause the data set is too big, the random state just makes sure that if i call it again itll give me the same sample
sampled_violence_data = violence_df[['AGE', 'COMMUNITY_AREA']].sample(n=1000, random_state=20)
#getting the count of each age category
age_counts = sampled_violence_data['AGE'].value_counts().reset_index()
age_counts.columns = ['Age Category', 'Count']
#seperating it by community area
age_counts_by_area = sampled_violence_data.groupby(['COMMUNITY_AREA', 'AGE']).size().reset_index(name='Count')
age_counts_by_area.columns = ['COMMUNITY_AREA', 'Age Category', 'Count']
# violent count total
total_counts_by_area = sampled_violence_data.groupby('COMMUNITY_AREA').size().reset_index(name='TotalCount')
#getting the top 10 community areas with the highest total counts
top_10_areas = total_counts_by_area.sort_values(by='TotalCount', ascending=False).head(10)['COMMUNITY_AREA']
#filter age_counts_by_area to only include the top 10 community areas
filtered_age_counts_by_area = age_counts_by_area[age_counts_by_area['COMMUNITY_AREA'].isin(top_10_areas)]
#combining age counts w total counts
filtered_age_counts_by_area = filtered_age_counts_by_area.merge(total_counts_by_area, on='COMMUNITY_AREA')
print(filtered_age_counts_by_area)
#bar chart time
ViolentAgeCount = alt.Chart(filtered_age_counts_by_area).mark_bar(size=30).encode(
y=alt.Y('COMMUNITY_AREA:N',
title='Community Area',
sort=alt.EncodingSortField(
field="TotalCount",
op="sum",
order="descending"
),
),
x=alt.X('Count:Q', title='Number of Violent Offenses'),
color=alt.Color('Age Category:O', title='Age Group'),
order=alt.Order('Age Category:O', sort='ascending'),
).properties(
height=400,
title = 'Top 10 Community Areas: Violent Crime Counts by Age Group'
)
ViolentAgeCount
COMMUNITY_AREA Age Category Count TotalCount 0 AUBURN GRESHAM 0-19 9 40 1 AUBURN GRESHAM 20-29 17 40 2 AUBURN GRESHAM 30-39 8 40 3 AUBURN GRESHAM 40-49 2 40 4 AUBURN GRESHAM 50-59 3 40 .. ... ... ... ... 56 WEST GARFIELD PARK 0-19 4 41 57 WEST GARFIELD PARK 20-29 14 41 58 WEST GARFIELD PARK 30-39 15 41 59 WEST GARFIELD PARK 40-49 4 41 60 WEST GARFIELD PARK 50-59 4 41 [61 rows x 4 columns]
violence_df = pd.read_csv('https://drive.google.com/uc?id=1IVyt5fFTInsmeTOV9T6RILB5XLM2Gc5R')
vacancy_df = pd.read_csv("https://drive.google.com/uc?id=1ysYE_S9b3pEuTa2Oj6srR1M9BNLL70BQ")
# Convert 'Issued Date' to datetime format
vacancy_df['Issued Date'] = pd.to_datetime(vacancy_df['Issued Date'])
# Extract the year and create a new column 'Year'
vacancy_df['Year'] = vacancy_df['Issued Date'].dt.year
print(violence_df.columns)
print(vacancy_df.columns)
Index(['CASE_NUMBER', 'DATE', 'BLOCK', 'VICTIMIZATION_PRIMARY',
'INCIDENT_PRIMARY', 'GUNSHOT_INJURY_I', 'UNIQUE_ID', 'ZIP_CODE', 'WARD',
'COMMUNITY_AREA', 'STREET_OUTREACH_ORGANIZATION', 'AREA', 'DISTRICT',
'BEAT', 'AGE', 'SEX', 'RACE', 'VICTIMIZATION_FBI_CD', 'INCIDENT_FBI_CD',
'VICTIMIZATION_FBI_DESCR', 'INCIDENT_FBI_DESCR',
'VICTIMIZATION_IUCR_CD', 'INCIDENT_IUCR_CD',
'VICTIMIZATION_IUCR_SECONDARY', 'INCIDENT_IUCR_SECONDARY',
'HOMICIDE_VICTIM_FIRST_NAME', 'HOMICIDE_VICTIM_MI',
'HOMICIDE_VICTIM_LAST_NAME', 'MONTH', 'DAY_OF_WEEK', 'HOUR',
'LOCATION_DESCRIPTION', 'STATE_HOUSE_DISTRICT', 'STATE_SENATE_DISTRICT',
'UPDATED', 'LATITUDE', 'LONGITUDE', 'LOCATION'],
dtype='object')
Index(['Docket Number', 'Violation Number', 'Issued Date',
'Issuing Department', 'Last Hearing Date', 'Property Address',
'Violation Type', 'Entity or Person(s)', 'Disposition Description',
'Total Fines', 'Total Administrative Costs', 'Interest Amount',
'Collection Costs or Attorney Fees', 'Court Cost',
'Original Total Amount Due', 'Total Paid', 'Current Amount Due',
'Latitude', 'Longitude', 'Location', 'Year'],
dtype='object')
# Convert 'Issued Date' to datetime format
vacancy_df['Issued Date'] = pd.to_datetime(vacancy_df['Issued Date'])
violence_df['DATE'] = pd.to_datetime(violence_df['DATE'])
# Extract the year and create a new column 'Year'
vacancy_df['Year'] = vacancy_df['Issued Date'].dt.year
violence_df['Year'] = violence_df['DATE'].dt.year
crime_by_year = violence_df.groupby('Year').size().reset_index(name='Number of Crimes')
vacancy_by_year = vacancy_df.groupby('Year').size().reset_index(name='Number of Vacancy Citations')
merged_data = crime_by_year.merge(vacancy_by_year, on="Year", how="outer")
# Filtering for the desired years
filtered_data = merged_data[merged_data['Year'].between(2018, 2022)]
filtered_data
# Determine the max value across all years for consistent y-axis range
max_value = max(filtered_data['Number of Crimes'].max(), filtered_data['Number of Vacancy Citations'].max())
#percentage change calculations number of crimes
#2018 - 2019
percentageCrime1819 = abs(((2972-2689)/2972)*100)
print(percentageCrime1819 )
#2019 - 2020
# ((2689-2972)/2689)*100
percentageCrime1920 = abs(((2689-4165)/2689)*100)
print(percentageCrime1920)
#2020-2021
percentageCrime2021 = abs(((4165-4473)/4165)*100)
print(percentageCrime2021)
#2021-2022
percentageCrime2122 = abs(((4473-3575)/4473)*100)
print(percentageCrime2122)
filtered_data
9.522207267833108 54.89029378951283 7.394957983193278 20.076011625307398
| Year | Number of Crimes | Number of Vacancy Citations | |
|---|---|---|---|
| 27 | 2018 | 2972 | 194.0 |
| 28 | 2019 | 2689 | 129.0 |
| 29 | 2020 | 4165 | 63.0 |
| 30 | 2021 | 4473 | 41.0 |
| 31 | 2022 | 3575 | 53.0 |
# percentage change vacancy citations
perctVac1819 = abs(((194.0 - 129.0)/194.0)*100)
print(perctVac1819)
perctVac1920 = abs(((129.0 - 63.0)/129.0)*100)
print(perctVac1920)
perctVac2021 = abs(((63.0 - 41.0)/63.0)*100)
print(perctVac2021)
perctVac2122 = abs(((41.0 - 53.0)/41.0)*100)
print(perctVac2122)
33.50515463917525 51.162790697674424 34.92063492063492 29.268292682926827
#new dataframe for time series charts
timeSeriesFrame = {'YearRange': ['2018-2019', '2019-2020', '2020-2021', '2021-2022'],
'Percent Change Crimes': [percentageCrime1819, percentageCrime1920, percentageCrime2021, percentageCrime2122],
'Percent Change Vacancy Citations': [perctVac1819, perctVac1920, perctVac2021, perctVac2122]}
# Create DataFrame
dfSeries = pd.DataFrame(timeSeriesFrame)
# Display the DataFrame
print(dfSeries)
YearRange Percent Change Crimes Percent Change Vacancy Citations 0 2018-2019 9.522207 33.505155 1 2019-2020 54.890294 51.162791 2 2020-2021 7.394958 34.920635 3 2021-2022 20.076012 29.268293
#put a brush in by year
crime_change = alt.Chart(dfSeries).mark_line().encode(
x=alt.X('YearRange:O', title='Year Range'),
y=alt.Y('Percent Change Crimes:Q', title='Percent Change', scale = alt.Scale(zero = False)),
color=alt.value('steelblue'),
# tooltip = ['Percent Change Crimes:Q']
).properties(
width = 300,
height = 300
)
vacancy_change = alt.Chart(dfSeries).mark_line().encode(
x=alt.X('YearRange:O', title='Year Range'),
y=alt.Y('Percent Change Vacancy Citations:Q', title='Percent Change', scale = alt.Scale(zero = False)),
color = alt.value('purple')
# color=alt.Color('Percent Change Crimes:Q', scale=alt.Scale(scheme='viridis'), title='Percent Change Crimes')
).properties(
width = 300,
height = 300
)
final_chart = crime_change + vacancy_change
final_chart2 = alt.hconcat(final_chart).properties(
title=" % Change Violent Crimes & Vacancy Violations"
).configure_axisX(
labelAngle=0
)
final_chart2
url_chi = "https://gist.githubusercontent.com/naomiwag99/176d44066f9cd0b6df0536a46c811f9b/raw/c0f530e99b580e906e322cb436eac6dac0544172/ChicagoCommunityAreas.geojson"
chi = alt.Data(url=url_chi, format=alt.DataFormat(property='features', type='json'))
url_vacant21 = "https://gist.githubusercontent.com/mjk7817/4ebc7d4e625aeb5edf463b9cd5b4baaf/raw/349f32e0a0652d87be0bd55ec0fa8dc72dead8ed/vacant21.geojson"
vacant21 = alt.Data(url=url_vacant21, format=alt.DataFormat(property='features', type='json'))
url_violent21 = "https://gist.githubusercontent.com/mjk7817/f64ff6983dff41b9449fb06fe045e8c3/raw/d552c59e8df2346198c0fa1b24e2eafe3b46fecd/violent21.geojson"
violent21 = alt.Data(url=url_violent21, format=alt.DataFormat(property='features', type='json'))
chi_land = alt.Chart(chi).mark_geoshape(
color='lightgray',
stroke='white',
# fill = mapfill2
).encode(
tooltip = alt.Tooltip(['properties.community:N'], title = "Community Area: ")
).project(type="identity", reflectY=True).properties(
width=600,
height=600
)
vacant21_land = alt.Chart(vacant21).mark_geoshape(
color='purple',
size=100
).encode(
tooltip = ['properties.entity_or_person_s_:N', 'properties.issued_date:N']
).project(type="identity", reflectY=True).properties(
width=600,
height=600
).properties(
title = "2021 Violent Crimes & Vacant Buildings in Chicago"
)
myColor = ['#E377C2', '#1f78b4', '#33a02c', '#e31a1c', '#ff7f00', '#6a3d9a', '#b15928'] #used ChatGPT to generate hex values here
dropdown = alt.binding_select(options=[None, 'BATTERY', 'HOMICIDE', 'ROBBERY'], name='Type of Crime: ')
selection = alt.selection_point(fields=['properties.incident_primary'], bind=dropdown)
#used ChatGPT to learn how to use the range arguement for my custom scale
map_fill = alt.condition(selection, alt.Color('properties.incident_primary:N', scale=alt.Scale(range = myColor), title='Type of Crime'), alt.value('lightgray'))
opacity_rule = alt.condition(
crime_selection, #checks to see if its in the selection
alt.value(0.8),
alt.value(0.1),
)
violent21_land = alt.Chart(violent21).mark_circle().encode(
latitude='properties.latitude:Q',
longitude='properties.longitude:Q',
color=alt.Color('properties.incident_primary:N', scale=alt.Scale(range= myColor), title='Type of Crime'),
tooltip=['properties.incident_primary:N', 'properties.location_description:N', 'properties.date:N'],
fill = map_fill,
opacity = opacity_rule
).project(
type="identity", reflectY=True
).add_params(
selection
)
final_map = chi_land + violent21_land + vacant21_land
final_map