import numpy as np
import pandas as pd
pd.options.display.max_rows = 3
## Load dataframes; databases downloaded from WorldBank.org
df1 = pd.read_csv('US_Employment.csv')[:-5]
df2 = pd.read_csv('US_Population.csv')[:-5]
## Delete empty rows and columns
remove_nan = lambda x: x.dropna(axis=[0,1],how='all',inplace=True)
remove_nan(df1)
remove_nan(df2)
## INNER JOIN dataframes
df0 = pd.concat([df1,df2],join='inner')
## Convert to a relational table
df0.drop(['Country_Name','Country_Code','Indicator_Code'],axis=1,inplace=True)
df0.set_index('Indicator_Name',inplace=True)
df0.columns = [x.split(' ')[0] for x in df0.columns.values]
df0.columns.name = 'Year'
df0 = df0.T
df0
## Extract gender from columns
df = df0.copy().T
gender = []
Indicator = []
for x in df.index:
if 'female' in x:
gender.append('female')
Indicator.append(x.replace('female',''))
elif 'male' in x:
gender.append('male')
Indicator.append(x.replace('male',''))
else:
gender.append('total')
Indicator.append(x.replace('total',''))
df['Gender'] = gender
df['Indicator'] = Indicator
df = df.set_index(['Indicator','Gender']).T
df
## Plot labor force by gender
labor = df.stack()['Labor force, '].unstack()*1e-6
remove_nan(labor)
labor.female = labor.female / labor.total * 100
labor['male'] = 100 - labor.female
fig = plt.figure()
labor = labor[['female','male','total']]
ax = labor.plot(style=['r.-','b.-','g.-'], ylim=[43,56], secondary_y='total')
ax.set_title('Labor Force by Gender', fontsize=14, weight='bold')
ax.set_xlabel('Year', fontsize=12)
ax.set_ylabel('Percent', fontsize=12)
ax.right_ax.set_ylabel('Count (millions)', fontsize=12)
ax.annotate('% of female labor force is rising', xytext=(20,43.8), xy=(21.5,46)
, fontsize=14, ha='right', bbox=dict(boxstyle='round',fc='gold',ec='k')
, arrowprops=dict(arrowstyle="->",connectionstyle="arc3,rad=0.5",relpos=(1,0.5)))
plt.show()
# Plot labor force by level of education
empl_educ = df.stack()
colnames = []
for col in empl_educ.columns:
if not 'Labor force with' in col:
del empl_educ[col]
else:
colnames.append(col.replace('Labor force with ','').split(' ')[0].capitalize())
remove_nan(empl_educ)
empl_educ.columns = colnames
empl_educ.columns.name = 'Eduction'
empl_educ = empl_educ.unstack()
ax = empl_educ.Primary.plot(style=['r-','b-','g-'])
handles, labels = ax.get_legend_handles_labels()
empl_educ[['Secondary','Tertiary']].plot(ax=ax, style=['r-o','b-o','g-o','r-s','b-s','g-s'])
legend(handles, labels, loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_title('Labor Force by Level of Education', fontsize=14, weight='bold')
ax.set_xlabel('Year', fontsize=12)
ax.set_ylabel('% by Gender', fontsize=12)
ax.annotate('Secondary', xytext=(0.5,48), xy=(1.5,51), ha='center', fontsize=12)
ax.annotate('Tertiary', xytext=(0.5,36), xy=(1.5,35), ha='center', fontsize=12)
ax.annotate('Primary', xytext=(0.5,18), xy=(1.5,15), ha='center', fontsize=12)
ax.text(2.5, 45, 'Secondary education is majority'
, fontsize=14, ha='center', bbox=dict(boxstyle='round',fc='aqua',ec='k'))
ax.annotate('Tertiary education is rising', xytext=(2,27), xy=(3.5,34)
, fontsize=14, ha='center', bbox=dict(boxstyle='round',fc='aqua',ec='k')
, arrowprops=dict(arrowstyle="->",connectionstyle="arc3,rad=0.5",relpos=(1,0.5)))
plt.show()
## Plot unemployment by gender
unempl = df.stack()['Unemployment, (% of labor force) (modeled ILO estimate)'].unstack()
remove_nan(unempl)
ax = unempl.plot(style=['r-','b-','g-'])
ax.set_title('Unemployment by Gender', fontsize=14, weight='bold')
ax.set_xlabel('Year', fontsize=12)
ax.set_ylabel('% by Gender', fontsize=12)
legend(loc='best')
textloc = (11,8.5)
ax.annotate('', xytext=textloc, xy=(12,6.5), arrowprops=dict(arrowstyle="->"), fontsize=14)
ax.annotate('', xytext=textloc, xy=(2,8), arrowprops=dict(arrowstyle="->"), fontsize=14)
ax.annotate('Recession\nCycles', xytext=textloc, xy=(17.5,10)
, ha='center', va='center', fontsize=14
, bbox=dict(boxstyle='round', fc='GreenYellow', ec='k')
, arrowprops=dict(arrowstyle="->"))
plt.show()
## Analyze employment vs economy growth
empl = pd.DataFrame({ ## number of employed labor force by gender
'female': labor.female/100 * labor.total * (1-unempl.female/100),
'male': labor.male/100 * labor.total * (1-unempl.male/100)})
remove_nan(empl)
empl['total'] = empl.sum(axis=1) ## total number of employed labor force
empl['Jobs'] = empl.total.diff() ## number of jobs created/lost
empl['GDP Growth'] = df['GDP (current US$)'].diff()*1e-12 ## GDP growth
## Plot employment vs GDP growth
plt.figure()
ax1 = empl.Jobs.plot(style='g-o', ylim=[-6.5,6.5])
ax1.set_title('Employment vs. GDP Growth', fontsize=16, weight='bold')
ax1.set_xlabel('Year', fontsize=12)
ax1.set_ylabel('Jobs Created/Lost (millions)', fontsize=12)
legend(loc='upper left')
ax2 = empl['GDP Growth'].plot(secondary_y=True, style='r.-', ylim=[-1.1,1.1])
ax2.right_ax.set_ylabel('GDP Growth (trillions)', fontsize=12)
legend(loc='upper right')
axhline(0,color='k')
ax1.fill([16,16,18,18],[-6.5,6.5,6.5,-6.5], color='grey', alpha=0.5)
ax1.annotate('Great Recession\n(2007-09)', xytext=(9,-4), xy=(17,-3)
, ha='center', va='center', fontsize=14
, bbox=dict(boxstyle='round', fc='Lavender', ec='k')
, arrowprops=dict(arrowstyle="->"))
ax1.text(12.5, -10, 'Strong correlation between employment and GDP growth'
, fontsize=14, ha='center', weight='bold', color='r')
plt.show()
## Get employment by sector
empl_sect = df.stack()
colnames = []
for col in empl_sect.columns:
if not 'Employees' in col:
del empl_sect[col]
else:
colnames.append(col.replace('Employees, ','').split(', ')[0].capitalize())
remove_nan(empl_sect)
empl_sect.columns = colnames
empl_sect.columns.name = 'Sector'
colsort = empl_sect.ix[0].sort_index(ascending=False).index
empl_sect = empl_sect[colsort]
empl_sect = empl_sect.unstack()
## Plot employment by sector
ax = empl_sect.plot(style=['b-','b--','r-','r--','g-','g--'])
ax.set_title('Employment by Sector', fontsize=14, weight='bold')
ax.set_ylabel('% by Gender', fontsize=12)
legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.annotate('Majority employed in Services', xytext=(15,72), xy=(28,68)
, fontsize=14, ha='center', bbox=dict(boxstyle='round',fc='gold',ec='k')
, arrowprops=dict(arrowstyle="->",connectionstyle="arc3,rad=-0.3",relpos=(1,0.5)))
ax.annotate('', xytext=(5,73), xy=(3,82), fontsize=14
, arrowprops=dict(arrowstyle="->",connectionstyle="arc3,rad=-0.3"))
plt.show()
## Bar plot
ax = empl_sect.stack().reset_index('Gender').groupby('Gender').plot(
kind='bar', stacked=True, color='brg')
ax[0].set_title('Female Employment', fontsize=14, weight='bold')
ax[1].set_title('Male Employment', fontsize=14, weight='bold')
ax[0].set_ylabel('Percent', fontsize=12)
ax[1].set_ylabel('Percent', fontsize=12)
plt.show()