World Population Trend Analysis
Download population data from data.gov
https://openei.org/doe-opendata/dataset/a7fea769-691d-4536-8ed3-471e993a2445/resource/86c50aa8-e40f-4859-b52e-29bb10166456/download/populationbycountry19802010millions.csv
In [40]:
## Read data
##
import pandas as pd
pop=pd.read_csv("populationbycountry19802010millions.csv")
Cleanup¶
Fill na, rename and convert data type
In [41]:
pop=pop.fillna(0)
pop=pop.replace('--',0)
pop=pop.rename(columns={"Unnamed: 0": "geo_location"})
##
for i in range(2008,2011):
pop[str(i)]=pop[str(i)].astype('float')
Select top 20 geo locations based on 2010 population¶
In [42]:
pop=pop.nlargest(10, '2010').reset_index(drop=True)
pop.head()
Out[42]:
Plotting¶
In [43]:
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
2010 population comparison¶
In [44]:
plt.figure( figsize = (20,6))
#set style of plots
sns.set_style('white')
sns.plt.xlabel('geo_location',weight='bold',size=22)
sns.plt.xticks(weight='bold',size=15,rotation=-45,color='blue')
sns.plt.yticks(weight='bold',size=15,color='blue')
#####################################
g=sns.barplot( y = '2010',
x = 'geo_location',
data = pop)
##
g.set_title('World Population 2010',size=40)
g.set_ylabel('Population in Millions',size=22,weight='bold')
g.set_xlabel('Geographic Region',size=22,weight='bold')
Out[44]:
Population Trend Using Point plot¶
In [87]:
v=pop.columns.drop('geo_location')
popM=pd.melt(pop, id_vars='geo_location', value_vars=v)
popM['value']=popM['value'].astype('float')
#popM=popM[popM['geo_location']in('World')]
popM=popM.set_index('geo_location')
popM=popM.loc[['World','Asia & Oceania','Africa','China','India','United States']].reset_index()
popM.head()
Out[87]:
In [88]:
plt.figure( figsize = (20,8))
#set style of plots
sns.set_style('white')
sns.plt.xlabel('geo_location',weight='bold',size=22)
sns.plt.xticks(weight='bold',size=15,rotation=-90,color='blue')
sns.plt.yticks(weight='bold',size=15,color='blue')
#####################################
g=sns.pointplot(x='variable', y='value', hue='geo_location',
data = popM)
##
g.set_title('Population Growth Trend (1980 - 2010)',size=30)
g.set_ylabel('Population in Millions',size=22,weight='bold')
g.set_xlabel('Year',size=22,weight='bold')
##
plt.setp(g.get_legend().get_texts(), fontsize='15') # for legend text
plt.setp(g.get_legend().get_title(), fontsize='18')
Out[88]:
Population Trend Using Box plot¶
In [89]:
plt.figure( figsize = (20,10))
#set style of plots
sns.set_style('white')
sns.plt.xlabel('geo_location',weight='bold',size=22)
sns.plt.xticks(weight='bold',size=15,rotation=-90,color='blue')
sns.plt.yticks(weight='bold',size=15,color='blue')
#####################################
g=sns.boxplot( x = popM['geo_location'],y = popM['value'])
##
g.set_title('Population Growth Trend (1980 - 2010)',size=30)
g.set_ylabel('Population in Millions',size=22,weight='bold')
g.set_xlabel('Year',size=22,weight='bold')
##
Out[89]:
No comments:
Post a Comment