import pandas as pd
import calendar
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 50)
data = pd.read_csv("https://missingmigrants.iom.int/sites/g/files/tmzbdl601/files/2022-02/Missing_Migrants_Global_Figures.csv")
cols = data.columns
new_column_names = []
for col in cols:
new_col = col.lstrip().rstrip().lower().replace (" ", "_")
new_column_names.append(new_col)
data.columns = new_column_names
data.info()
data["coordinates"] = data["coordinates"].str.replace("POINT","")
data["coordinates"] = data["coordinates"].str.replace("(","")
data["coordinates"] = data["coordinates"].str.replace(")","")
data[['lon', 'lat']] = data['coordinates'].str.split(expand=True)
data.info()
data["death_cause_clean"]=""
for index,row in data.iterrows():
cause = (row["cause_of_death"])
if "Violence" in cause:
data["death_cause_clean"][index] = "Violence"
elif "Harsh environmental conditions / lack of adequate shelter, food, water" in cause:
data["death_cause_clean"][index] = "Harsh environmental conditions"
elif "Drowning" in cause:
data["death_cause_clean"][index] = "Drowning"
elif "Vehicle accident / death linked to hazardous transport" in cause:
data["death_cause_clean"][index] = "Vehicle accident"
elif "Sickness / lack of access to adequate healthcare" in cause:
data["death_cause_clean"][index] = "Inadequate healthcare access"
elif "Mixed or unknown" in cause:
data["death_cause_clean"][index] = "Mixed or unknown"
elif "Accidental death" in cause:
data["death_cause_clean"][index] = "Accidental death"
else:
print(cause)
data.info()
region_and_cause = data.groupby(['region_of_incident','death_cause_clean'])['total_number_of_dead_and_missing'].sum().reset_index()
region_and_cause.rename(columns={'region_of_incident': 'source', 'death_cause_clean': 'target', 'total_number_of_dead_and_missing':'value'}, inplace=True)
region_and_cause
month_and_cause = data.groupby(['incident_date','death_cause_clean'])['total_number_of_dead_and_missing'].sum().reset_index()
month_and_cause['incident_date'] = pd.to_datetime(month_and_cause['incident_date'])
month_and_cause['month_year'] = month_and_cause['incident_date'].dt.to_period('M')
month_and_cause.loc[:, month_and_cause.columns!='incident_date']
month_and_cause = month_and_cause.groupby(['month_year','death_cause_clean'])['total_number_of_dead_and_missing'].sum().reset_index()
month_and_cause = month_and_cause.pivot(index='month_year', columns='death_cause_clean', values='total_number_of_dead_and_missing').reset_index()
cols = ['Accidental death', 'Drowning', 'Harsh environmental conditions','Mixed or unknown','Inadequate healthcare access','Vehicle accident','Violence']
month_and_cause[cols] = month_and_cause[cols].fillna(0).astype(int)
month_and_cause
year_and_route = data.groupby(['incident_year','migration_route'])['total_number_of_dead_and_missing'].sum().reset_index()
year_and_route = year_and_route[(year_and_route.migration_route=='US-Mexico border crossing')|
(year_and_route.migration_route=="Afghanistan to Iran")|
(year_and_route.migration_route=="Central Mediterranean")|
(year_and_route.migration_route=="Eastern Mediterranean")|
(year_and_route.migration_route=="Sahara Desert crossing")|
(year_and_route.migration_route=="Western Mediterranean")
]
year_and_route = year_and_route[(year_and_route.incident_year!=2022)]
year_and_route = year_and_route.sort_values(["incident_year","total_number_of_dead_and_missing"],ascending=[True, False])
year_and_route.info()
mexico_to_us = data[data.migration_route == "US-Mexico border crossing"]
regions = ['Europe','Mediterranean','Northern Africa']
europe = data[data['region_of_incident'].isin(regions)]
drowning_cases = data.groupby(['death_cause_clean'])['total_number_of_dead_and_missing'].sum().reset_index()
drowning_cases
drowning_cases["percent_cases"] = ((drowning_cases["total_number_of_dead_and_missing"])/drowning_cases['total_number_of_dead_and_missing'].sum())*100
data.to_csv("../data/data.csv",index=False)
region_and_cause.to_csv("../data/region_and_cause.csv", index = False)
month_and_cause.to_csv("../data/month_and_cause.csv",index=False)
year_and_route.to_csv("../data/year_and_route.csv",index=False)
mexico_to_us.to_csv("../data/mexico_to_us.csv",index=False)
europe.to_csv("../data/europe.csv",index=False)
-30-