US Employment Demographics

Source Code in IPython Notebook:

In [1]:
import numpy as np
import pandas as pd
pd.options.display.max_rows = 3
In [2]:
## Load dataframes; databases downloaded from WorldBank.org
df1 = pd.read_csv('US_Employment.csv')[:-5]
df2 = pd.read_csv('US_Population.csv')[:-5]
## Delete empty rows and columns
remove_nan = lambda x: x.dropna(axis=[0,1],how='all',inplace=True)
remove_nan(df1)
remove_nan(df2)
## INNER JOIN dataframes
df0 = pd.concat([df1,df2],join='inner')
## Convert to a relational table
df0.drop(['Country_Name','Country_Code','Indicator_Code'],axis=1,inplace=True)
df0.set_index('Indicator_Name',inplace=True)
df0.columns = [x.split(' ')[0] for x in df0.columns.values]
df0.columns.name = 'Year'
df0 = df0.T
df0
Out[2]:
Indicator_Name Employees, agriculture, female (% of female employment) Employees, agriculture, male (% of male employment) Employees, industry, female (% of female employment) Employees, industry, male (% of male employment) Employees, services, female (% of female employment) Employees, services, male (% of male employment) Employers, female (% of employment) Employers, male (% of employment) Employers, total (% of employment) Employment rate of persons aged 25-49 with a child under age 3 living in a household, female (%) Employment rate of persons aged 25-49 with a child under age 3 living in a household, male (%) Employment rate of persons aged 25-49 with no children living in the household, female (%) Employment rate of persons aged 25-49 with no children living in the household, male (%) Employment to population ratio, 15+, female (%) (modeled ILO estimate) Employment to population ratio, 15+, female (%) (national estimate) Employment to population ratio, 15+, male (%) (modeled ILO estimate) Employment to population ratio, 15+, male (%) (national estimate) Employment to population ratio, 15+, total (%) (modeled ILO estimate) Employment to population ratio, 15+, total (%) (national estimate) Employment to population ratio, ages 15-24, female (%) (modeled ILO estimate)
Year
1980 1.6 5.0 18.500000 39.799999 79.900002 55.200001 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ...
1981 1.6 4.9 18.200001 39.299999 80.199997 55.700001 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ...
1982 1.6 5.1 17.299999 37.400002 81.099998 57.500000 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...

33 rows × 105 columns

In [3]:
## Extract gender from columns
df = df0.copy().T
gender = []
Indicator = []
for x in df.index:
    if 'female' in x:
        gender.append('female')
        Indicator.append(x.replace('female',''))
    elif 'male' in x:
        gender.append('male') 
        Indicator.append(x.replace('male',''))
    else:
        gender.append('total')
        Indicator.append(x.replace('total',''))
df['Gender'] = gender
df['Indicator'] = Indicator
df = df.set_index(['Indicator','Gender']).T
df
Out[3]:
Indicator Employees, agriculture, (% of employment) Employees, industry, (% of employment) Employees, services, (% of employment) Employers, (% of employment) Employment rate of persons aged 25-49 with a child under age 3 living in a household, (%) Employment rate of persons aged 25-49 with no children living in the household, (%) Employment to population ratio, 15+, (%) (modeled ILO estimate) Employment to population ratio, 15+, (%) (national estimate) Employment to population ratio, 15+, (%) (modeled ILO estimate) Employment to population ratio, 15+, (%) (national estimate) Employment to population ratio, 15+, (%) (modeled ILO estimate) Employment to population ratio, 15+, (%) (national estimate) Employment to population ratio, ages 15-24, (%) (modeled ILO estimate)
Gender female male female male female male female male total female male female male female female male male total total female
1980 1.6 5.0 18.500000 39.799999 79.900002 55.200001 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ...
1981 1.6 4.9 18.200001 39.299999 80.199997 55.700001 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ...
1982 1.6 5.1 17.299999 37.400002 81.099998 57.500000 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...

33 rows × 105 columns

In [4]:
## Plot labor force by gender
labor = df.stack()['Labor force, '].unstack()*1e-6
remove_nan(labor)
labor.female = labor.female / labor.total * 100
labor['male'] = 100 - labor.female
fig = plt.figure()
labor = labor[['female','male','total']]
ax = labor.plot(style=['r.-','b.-','g.-'], ylim=[43,56], secondary_y='total')
ax.set_title('Labor Force by Gender', fontsize=14, weight='bold')
ax.set_xlabel('Year', fontsize=12)
ax.set_ylabel('Percent', fontsize=12)
ax.right_ax.set_ylabel('Count  (millions)', fontsize=12)
ax.annotate('% of female labor force is rising', xytext=(20,43.8), xy=(21.5,46)
            , fontsize=14, ha='right', bbox=dict(boxstyle='round',fc='gold',ec='k')
            , arrowprops=dict(arrowstyle="->",connectionstyle="arc3,rad=0.5",relpos=(1,0.5)))
plt.show()
<matplotlib.figure.Figure at 0x9f7406c>
In [5]:
# Plot labor force by level of education 
empl_educ = df.stack()
colnames = []
for col in empl_educ.columns:
    if not 'Labor force with' in col:
        del empl_educ[col]
    else:
        colnames.append(col.replace('Labor force with ','').split(' ')[0].capitalize())
remove_nan(empl_educ)
empl_educ.columns = colnames
empl_educ.columns.name = 'Eduction'
empl_educ = empl_educ.unstack()
ax = empl_educ.Primary.plot(style=['r-','b-','g-'])
handles, labels = ax.get_legend_handles_labels()
empl_educ[['Secondary','Tertiary']].plot(ax=ax, style=['r-o','b-o','g-o','r-s','b-s','g-s'])
legend(handles, labels, loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_title('Labor Force by Level of Education', fontsize=14, weight='bold')
ax.set_xlabel('Year', fontsize=12)
ax.set_ylabel('% by Gender', fontsize=12)
ax.annotate('Secondary', xytext=(0.5,48), xy=(1.5,51), ha='center', fontsize=12)
ax.annotate('Tertiary', xytext=(0.5,36), xy=(1.5,35), ha='center', fontsize=12)
ax.annotate('Primary', xytext=(0.5,18), xy=(1.5,15), ha='center', fontsize=12)
ax.text(2.5, 45, 'Secondary education is majority'
        , fontsize=14, ha='center', bbox=dict(boxstyle='round',fc='aqua',ec='k'))
ax.annotate('Tertiary education is rising', xytext=(2,27), xy=(3.5,34)
            , fontsize=14, ha='center', bbox=dict(boxstyle='round',fc='aqua',ec='k')
            , arrowprops=dict(arrowstyle="->",connectionstyle="arc3,rad=0.5",relpos=(1,0.5)))
plt.show()
In [6]:
## Plot unemployment by gender
unempl = df.stack()['Unemployment,  (% of  labor force) (modeled ILO estimate)'].unstack()
remove_nan(unempl)
ax = unempl.plot(style=['r-','b-','g-'])
ax.set_title('Unemployment by Gender', fontsize=14, weight='bold')
ax.set_xlabel('Year', fontsize=12)
ax.set_ylabel('% by Gender', fontsize=12)
legend(loc='best')
textloc = (11,8.5)
ax.annotate('', xytext=textloc, xy=(12,6.5), arrowprops=dict(arrowstyle="->"), fontsize=14)
ax.annotate('', xytext=textloc, xy=(2,8), arrowprops=dict(arrowstyle="->"), fontsize=14)
ax.annotate('Recession\nCycles', xytext=textloc, xy=(17.5,10)
            , ha='center', va='center', fontsize=14
            , bbox=dict(boxstyle='round', fc='GreenYellow', ec='k')
            , arrowprops=dict(arrowstyle="->"))
plt.show()
In [7]:
## Analyze employment vs economy growth
empl = pd.DataFrame({  ## number of employed labor force by gender
        'female': labor.female/100 * labor.total * (1-unempl.female/100),
        'male':   labor.male/100 * labor.total * (1-unempl.male/100)})
remove_nan(empl)
empl['total'] = empl.sum(axis=1) ## total number of employed labor force
empl['Jobs'] = empl.total.diff() ## number of jobs created/lost
empl['GDP Growth'] = df['GDP (current US$)'].diff()*1e-12  ## GDP growth
In [8]:
## Plot employment vs GDP growth
plt.figure()
ax1 = empl.Jobs.plot(style='g-o', ylim=[-6.5,6.5])
ax1.set_title('Employment vs. GDP Growth', fontsize=16, weight='bold')
ax1.set_xlabel('Year', fontsize=12)
ax1.set_ylabel('Jobs Created/Lost  (millions)', fontsize=12)
legend(loc='upper left')
ax2 = empl['GDP Growth'].plot(secondary_y=True, style='r.-', ylim=[-1.1,1.1])
ax2.right_ax.set_ylabel('GDP Growth  (trillions)', fontsize=12)
legend(loc='upper right')
axhline(0,color='k')
ax1.fill([16,16,18,18],[-6.5,6.5,6.5,-6.5], color='grey', alpha=0.5)
ax1.annotate('Great Recession\n(2007-09)', xytext=(9,-4), xy=(17,-3)
             , ha='center', va='center', fontsize=14
             , bbox=dict(boxstyle='round', fc='Lavender', ec='k')
             , arrowprops=dict(arrowstyle="->"))
ax1.text(12.5, -10, 'Strong correlation between employment and GDP growth'
        , fontsize=14, ha='center', weight='bold', color='r')
plt.show()
In [9]:
## Get employment by sector
empl_sect = df.stack()
colnames = []
for col in empl_sect.columns:
    if not 'Employees' in col:
        del empl_sect[col]
    else:
        colnames.append(col.replace('Employees, ','').split(', ')[0].capitalize())
remove_nan(empl_sect)
empl_sect.columns = colnames
empl_sect.columns.name = 'Sector'
colsort = empl_sect.ix[0].sort_index(ascending=False).index
empl_sect = empl_sect[colsort]
empl_sect = empl_sect.unstack()
## Plot employment by sector
ax = empl_sect.plot(style=['b-','b--','r-','r--','g-','g--'])
ax.set_title('Employment by Sector', fontsize=14, weight='bold')
ax.set_ylabel('% by Gender', fontsize=12)
legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.annotate('Majority employed in Services', xytext=(15,72), xy=(28,68)
            , fontsize=14, ha='center', bbox=dict(boxstyle='round',fc='gold',ec='k')
            , arrowprops=dict(arrowstyle="->",connectionstyle="arc3,rad=-0.3",relpos=(1,0.5)))
ax.annotate('', xytext=(5,73), xy=(3,82), fontsize=14
            , arrowprops=dict(arrowstyle="->",connectionstyle="arc3,rad=-0.3"))
plt.show()
## Bar plot
ax = empl_sect.stack().reset_index('Gender').groupby('Gender').plot(
        kind='bar', stacked=True, color='brg')
ax[0].set_title('Female Employment', fontsize=14, weight='bold')
ax[1].set_title('Male Employment', fontsize=14, weight='bold')
ax[0].set_ylabel('Percent', fontsize=12)
ax[1].set_ylabel('Percent', fontsize=12)
plt.show()