Friday, June 15, 2018

World Population Trend

World Population Trend Analysis

Download population data from data.gov

World_Population_Trend_Analysis
In [40]:
## Read data
##
import pandas as pd
pop=pd.read_csv("populationbycountry19802010millions.csv")

Cleanup

Fill na, rename and convert data type
In [41]:
pop=pop.fillna(0)
pop=pop.replace('--',0)
pop=pop.rename(columns={"Unnamed: 0": "geo_location"})
##
for i in range(2008,2011):
    pop[str(i)]=pop[str(i)].astype('float')

Select top 20 geo locations based on 2010 population

In [42]:
pop=pop.nlargest(10, '2010').reset_index(drop=True)
pop.head()
Out[42]:
geo_location 1980 1981 1982 1983 1984 1985 1986 1987 1988 ... 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010
0 World 4451.32679 4534.01064 4613.94102 4694.9362 4775.05313 4856.52116 4940.1947 5026.35348 5113.1907 ... 6165.32364 6241.71768 6317.4264 6393.12094 6469.12974 6545.88444 6623.57196 6700.76588 6776.91747 6853.01941
1 Asia & Oceania 2469.81743 2518.13689 2563.52187 2610.10261 2655.88059 2702.75408 2751.06581 2802.31491 2853.88173 ... 3450.06017 3490.97008 3531.28903 3570.71937 3609.62956 3648.16942 3686.3589 3724.32335 3762.16099 3799.67028
2 China 984.73646 997.00072 1012.49049 1028.35653 1042.75605 1058.00772 1074.52256 1093.72571 1112.86641 ... 1270.74423 1277.59472 1284.30332 1291.0018 1297.76532 1304.26188 1310.58354 1317.06568 1323.59158 1330.14129
3 India 684.8877 699.15377 713.71094 728.52155 744.01697 759.61221 775.06324 790.64043 806.3794 ... 1023.29508 1040.28482 1057.25112 1074.15902 1090.97303 1107.62435 1124.1348 1140.56621 1156.89777 1173.10802
4 Africa 478.96479 493.63386 508.69882 523.66026 538.6777 553.7385 569.05927 584.06285 599.18529 ... 823.10298 843.06171 863.1923 883.59024 904.41715 925.68403 947.55885 969.97221 992.58225 1015.47842
5 rows × 32 columns

Plotting

In [43]:
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt

2010 population comparison

In [44]:
plt.figure( figsize = (20,6))
#set style of plots
sns.set_style('white')
sns.plt.xlabel('geo_location',weight='bold',size=22)
sns.plt.xticks(weight='bold',size=15,rotation=-45,color='blue')
sns.plt.yticks(weight='bold',size=15,color='blue')
#####################################
g=sns.barplot( y = '2010',
          x = 'geo_location',
          data = pop)
##
g.set_title('World Population 2010',size=40)
g.set_ylabel('Population in Millions',size=22,weight='bold')
g.set_xlabel('Geographic Region',size=22,weight='bold')
Out[44]:
<matplotlib.text.Text at 0x9d31e10>

Population Trend Using Point plot

In [87]:
v=pop.columns.drop('geo_location')
popM=pd.melt(pop, id_vars='geo_location', value_vars=v)
popM['value']=popM['value'].astype('float')
#popM=popM[popM['geo_location']in('World')]
popM=popM.set_index('geo_location')
popM=popM.loc[['World','Asia & Oceania','Africa','China','India','United States']].reset_index()
popM.head()
Out[87]:
geo_location variable value
0 World 1980 4451.32679
1 World 1981 4534.01064
2 World 1982 4613.94102
3 World 1983 4694.93620
4 World 1984 4775.05313
In [88]:
plt.figure( figsize = (20,8))
#set style of plots
sns.set_style('white')
sns.plt.xlabel('geo_location',weight='bold',size=22)
sns.plt.xticks(weight='bold',size=15,rotation=-90,color='blue')
sns.plt.yticks(weight='bold',size=15,color='blue')
#####################################
g=sns.pointplot(x='variable', y='value', hue='geo_location',
          data = popM)
##
g.set_title('Population Growth Trend (1980 - 2010)',size=30)
g.set_ylabel('Population in Millions',size=22,weight='bold')
g.set_xlabel('Year',size=22,weight='bold')
##
plt.setp(g.get_legend().get_texts(), fontsize='15') # for legend text
plt.setp(g.get_legend().get_title(), fontsize='18') 
Out[88]:
[None, None]

Population Trend Using Box plot

In [89]:
plt.figure( figsize = (20,10))
#set style of plots
sns.set_style('white')
sns.plt.xlabel('geo_location',weight='bold',size=22)
sns.plt.xticks(weight='bold',size=15,rotation=-90,color='blue')
sns.plt.yticks(weight='bold',size=15,color='blue')
#####################################
g=sns.boxplot( x = popM['geo_location'],y = popM['value'])
##
g.set_title('Population Growth Trend (1980 - 2010)',size=30)
g.set_ylabel('Population in Millions',size=22,weight='bold')
g.set_xlabel('Year',size=22,weight='bold')
##
Out[89]:
<matplotlib.text.Text at 0x21c82a58>

No comments:

Post a Comment