import numpy as np
import pandas as pd
pd.options.display.max_rows = 3

## Load dataframes; databases downloaded from WorldBank.org
df1 = pd.read_csv('US_Employment.csv')[:-5]
df2 = pd.read_csv('US_Population.csv')[:-5]
## Delete empty rows and columns
remove_nan = lambda x: x.dropna(axis=[0,1],how='all',inplace=True)
remove_nan(df1)
remove_nan(df2)
## INNER JOIN dataframes
df0 = pd.concat([df1,df2],join='inner')
## Convert to a relational table
df0.drop(['﻿Country_Name','Country_Code','Indicator_Code'],axis=1,inplace=True)
df0.set_index('Indicator_Name',inplace=True)
df0.columns = [x.split(' ')[0] for x in df0.columns.values]
df0.columns.name = 'Year'
df0 = df0.T
df0

## Extract gender from columns
df = df0.copy().T
gender = []
Indicator = []
for x in df.index:
    if 'female' in x:
        gender.append('female')
        Indicator.append(x.replace('female',''))
    elif 'male' in x:
        gender.append('male') 
        Indicator.append(x.replace('male',''))
    else:
        gender.append('total')
        Indicator.append(x.replace('total',''))
df['Gender'] = gender
df['Indicator'] = Indicator
df = df.set_index(['Indicator','Gender']).T
df

## Plot labor force by gender
labor = df.stack()['Labor force, '].unstack()*1e-6
remove_nan(labor)
labor.female = labor.female / labor.total * 100
labor['male'] = 100 - labor.female
fig = plt.figure()
labor = labor[['female','male','total']]
ax = labor.plot(style=['r.-','b.-','g.-'], ylim=[43,56], secondary_y='total')
ax.set_title('Labor Force by Gender', fontsize=14, weight='bold')
ax.set_xlabel('Year', fontsize=12)
ax.set_ylabel('Percent', fontsize=12)
ax.right_ax.set_ylabel('Count  (millions)', fontsize=12)
ax.annotate('% of female labor force is rising', xytext=(20,43.8), xy=(21.5,46)
            , fontsize=14, ha='right', bbox=dict(boxstyle='round',fc='gold',ec='k')
            , arrowprops=dict(arrowstyle="->",connectionstyle="arc3,rad=0.5",relpos=(1,0.5)))
plt.show()

<matplotlib.figure.Figure at 0x9f7406c>

# Plot labor force by level of education 
empl_educ = df.stack()
colnames = []
for col in empl_educ.columns:
    if not 'Labor force with' in col:
        del empl_educ[col]
    else:
        colnames.append(col.replace('Labor force with ','').split(' ')[0].capitalize())
remove_nan(empl_educ)
empl_educ.columns = colnames
empl_educ.columns.name = 'Eduction'
empl_educ = empl_educ.unstack()
ax = empl_educ.Primary.plot(style=['r-','b-','g-'])
handles, labels = ax.get_legend_handles_labels()
empl_educ[['Secondary','Tertiary']].plot(ax=ax, style=['r-o','b-o','g-o','r-s','b-s','g-s'])
legend(handles, labels, loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_title('Labor Force by Level of Education', fontsize=14, weight='bold')
ax.set_xlabel('Year', fontsize=12)
ax.set_ylabel('% by Gender', fontsize=12)
ax.annotate('Secondary', xytext=(0.5,48), xy=(1.5,51), ha='center', fontsize=12)
ax.annotate('Tertiary', xytext=(0.5,36), xy=(1.5,35), ha='center', fontsize=12)
ax.annotate('Primary', xytext=(0.5,18), xy=(1.5,15), ha='center', fontsize=12)
ax.text(2.5, 45, 'Secondary education is majority'
        , fontsize=14, ha='center', bbox=dict(boxstyle='round',fc='aqua',ec='k'))
ax.annotate('Tertiary education is rising', xytext=(2,27), xy=(3.5,34)
            , fontsize=14, ha='center', bbox=dict(boxstyle='round',fc='aqua',ec='k')
            , arrowprops=dict(arrowstyle="->",connectionstyle="arc3,rad=0.5",relpos=(1,0.5)))
plt.show()

## Plot unemployment by gender
unempl = df.stack()['Unemployment,  (% of  labor force) (modeled ILO estimate)'].unstack()
remove_nan(unempl)
ax = unempl.plot(style=['r-','b-','g-'])
ax.set_title('Unemployment by Gender', fontsize=14, weight='bold')
ax.set_xlabel('Year', fontsize=12)
ax.set_ylabel('% by Gender', fontsize=12)
legend(loc='best')
textloc = (11,8.5)
ax.annotate('', xytext=textloc, xy=(12,6.5), arrowprops=dict(arrowstyle="->"), fontsize=14)
ax.annotate('', xytext=textloc, xy=(2,8), arrowprops=dict(arrowstyle="->"), fontsize=14)
ax.annotate('Recession\nCycles', xytext=textloc, xy=(17.5,10)
            , ha='center', va='center', fontsize=14
            , bbox=dict(boxstyle='round', fc='GreenYellow', ec='k')
            , arrowprops=dict(arrowstyle="->"))
plt.show()

## Analyze employment vs economy growth
empl = pd.DataFrame({  ## number of employed labor force by gender
        'female': labor.female/100 * labor.total * (1-unempl.female/100),
        'male':   labor.male/100 * labor.total * (1-unempl.male/100)})
remove_nan(empl)
empl['total'] = empl.sum(axis=1) ## total number of employed labor force
empl['Jobs'] = empl.total.diff() ## number of jobs created/lost
empl['GDP Growth'] = df['GDP (current US$)'].diff()*1e-12  ## GDP growth

## Plot employment vs GDP growth
plt.figure()
ax1 = empl.Jobs.plot(style='g-o', ylim=[-6.5,6.5])
ax1.set_title('Employment vs. GDP Growth', fontsize=16, weight='bold')
ax1.set_xlabel('Year', fontsize=12)
ax1.set_ylabel('Jobs Created/Lost  (millions)', fontsize=12)
legend(loc='upper left')
ax2 = empl['GDP Growth'].plot(secondary_y=True, style='r.-', ylim=[-1.1,1.1])
ax2.right_ax.set_ylabel('GDP Growth  (trillions)', fontsize=12)
legend(loc='upper right')
axhline(0,color='k')
ax1.fill([16,16,18,18],[-6.5,6.5,6.5,-6.5], color='grey', alpha=0.5)
ax1.annotate('Great Recession\n(2007-09)', xytext=(9,-4), xy=(17,-3)
             , ha='center', va='center', fontsize=14
             , bbox=dict(boxstyle='round', fc='Lavender', ec='k')
             , arrowprops=dict(arrowstyle="->"))
ax1.text(12.5, -10, 'Strong correlation between employment and GDP growth'
        , fontsize=14, ha='center', weight='bold', color='r')
plt.show()

## Get employment by sector
empl_sect = df.stack()
colnames = []
for col in empl_sect.columns:
    if not 'Employees' in col:
        del empl_sect[col]
    else:
        colnames.append(col.replace('Employees, ','').split(', ')[0].capitalize())
remove_nan(empl_sect)
empl_sect.columns = colnames
empl_sect.columns.name = 'Sector'
colsort = empl_sect.ix[0].sort_index(ascending=False).index
empl_sect = empl_sect[colsort]
empl_sect = empl_sect.unstack()
## Plot employment by sector
ax = empl_sect.plot(style=['b-','b--','r-','r--','g-','g--'])
ax.set_title('Employment by Sector', fontsize=14, weight='bold')
ax.set_ylabel('% by Gender', fontsize=12)
legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.annotate('Majority employed in Services', xytext=(15,72), xy=(28,68)
            , fontsize=14, ha='center', bbox=dict(boxstyle='round',fc='gold',ec='k')
            , arrowprops=dict(arrowstyle="->",connectionstyle="arc3,rad=-0.3",relpos=(1,0.5)))
ax.annotate('', xytext=(5,73), xy=(3,82), fontsize=14
            , arrowprops=dict(arrowstyle="->",connectionstyle="arc3,rad=-0.3"))
plt.show()
## Bar plot
ax = empl_sect.stack().reset_index('Gender').groupby('Gender').plot(
        kind='bar', stacked=True, color='brg')
ax[0].set_title('Female Employment', fontsize=14, weight='bold')
ax[1].set_title('Male Employment', fontsize=14, weight='bold')
ax[0].set_ylabel('Percent', fontsize=12)
ax[1].set_ylabel('Percent', fontsize=12)
plt.show()

Indicator_Name	Employees, agriculture, female (% of female employment)	Employees, agriculture, male (% of male employment)	Employees, industry, female (% of female employment)	Employees, industry, male (% of male employment)	Employees, services, female (% of female employment)	Employees, services, male (% of male employment)	Employers, female (% of employment)	Employers, male (% of employment)	Employers, total (% of employment)	Employment rate of persons aged 25-49 with a child under age 3 living in a household, female (%)	Employment rate of persons aged 25-49 with a child under age 3 living in a household, male (%)	Employment rate of persons aged 25-49 with no children living in the household, female (%)	Employment rate of persons aged 25-49 with no children living in the household, male (%)	Employment to population ratio, 15+, female (%) (modeled ILO estimate)	Employment to population ratio, 15+, female (%) (national estimate)	Employment to population ratio, 15+, male (%) (modeled ILO estimate)	Employment to population ratio, 15+, male (%) (national estimate)	Employment to population ratio, 15+, total (%) (modeled ILO estimate)	Employment to population ratio, 15+, total (%) (national estimate)	Employment to population ratio, ages 15-24, female (%) (modeled ILO estimate)
Year
1980	1.6	5.0	18.500000	39.799999	79.900002	55.200001	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...
1981	1.6	4.9	18.200001	39.299999	80.199997	55.700001	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...
1982	1.6	5.1	17.299999	37.400002	81.099998	57.500000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...
	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...

Indicator	Employees, agriculture, (% of employment)		Employees, industry, (% of employment)		Employees, services, (% of employment)		Employers, (% of employment)			Employment rate of persons aged 25-49 with a child under age 3 living in a household, (%)		Employment rate of persons aged 25-49 with no children living in the household, (%)		Employment to population ratio, 15+, (%) (modeled ILO estimate)	Employment to population ratio, 15+, (%) (national estimate)	Employment to population ratio, 15+, (%) (modeled ILO estimate)	Employment to population ratio, 15+, (%) (national estimate)	Employment to population ratio, 15+, (%) (modeled ILO estimate)	Employment to population ratio, 15+, (%) (national estimate)	Employment to population ratio, ages 15-24, (%) (modeled ILO estimate)
Gender	female	male	female	male	female	male	female	male	total	female	male	female	male	female	female	male	male	total	total	female
1980	1.6	5.0	18.500000	39.799999	79.900002	55.200001	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...
1981	1.6	4.9	18.200001	39.299999	80.199997	55.700001	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...
1982	1.6	5.1	17.299999	37.400002	81.099998	57.500000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...
	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...

US Employment Demographics

Source Code in IPython Notebook: