python-pour-finance/03-Pandas/.ipynb_checkpoints/03-DataFrames-checkpoint.ipynb

2391 lines
58 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"___\n",
"\n",
"<a href='http://www.pieriandata.com'> <img src='../Pierian_Data_Logo.png' /></a>\n",
"___"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"# DataFrames\n",
"\n",
"DataFrames are the workhorse of pandas and are directly inspired by the R programming language. We can think of a DataFrame as a bunch of Series objects put together to share the same index. Let's use pandas to explore this topic!"
]
},
{
"cell_type": "code",
"execution_count": 183,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 184,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from numpy.random import randn\n",
"np.random.seed(101)"
]
},
{
"cell_type": "code",
"execution_count": 185,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df = pd.DataFrame(randn(5,4),index='A B C D E'.split(),columns='W X Y Z'.split())"
]
},
{
"cell_type": "code",
"execution_count": 186,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>W</th>\n",
" <th>X</th>\n",
" <th>Y</th>\n",
" <th>Z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>2.706850</td>\n",
" <td>0.628133</td>\n",
" <td>0.907969</td>\n",
" <td>0.503826</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>0.651118</td>\n",
" <td>-0.319318</td>\n",
" <td>-0.848077</td>\n",
" <td>0.605965</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>-2.018168</td>\n",
" <td>0.740122</td>\n",
" <td>0.528813</td>\n",
" <td>-0.589001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>0.188695</td>\n",
" <td>-0.758872</td>\n",
" <td>-0.933237</td>\n",
" <td>0.955057</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>0.190794</td>\n",
" <td>1.978757</td>\n",
" <td>2.605967</td>\n",
" <td>0.683509</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" W X Y Z\n",
"A 2.706850 0.628133 0.907969 0.503826\n",
"B 0.651118 -0.319318 -0.848077 0.605965\n",
"C -2.018168 0.740122 0.528813 -0.589001\n",
"D 0.188695 -0.758872 -0.933237 0.955057\n",
"E 0.190794 1.978757 2.605967 0.683509"
]
},
"execution_count": 186,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Selection and Indexing\n",
"\n",
"Let's learn the various methods to grab data from a DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 187,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"A 2.706850\n",
"B 0.651118\n",
"C -2.018168\n",
"D 0.188695\n",
"E 0.190794\n",
"Name: W, dtype: float64"
]
},
"execution_count": 187,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['W']"
]
},
{
"cell_type": "code",
"execution_count": 188,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>W</th>\n",
" <th>Z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>2.706850</td>\n",
" <td>0.503826</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>0.651118</td>\n",
" <td>0.605965</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>-2.018168</td>\n",
" <td>-0.589001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>0.188695</td>\n",
" <td>0.955057</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>0.190794</td>\n",
" <td>0.683509</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" W Z\n",
"A 2.706850 0.503826\n",
"B 0.651118 0.605965\n",
"C -2.018168 -0.589001\n",
"D 0.188695 0.955057\n",
"E 0.190794 0.683509"
]
},
"execution_count": 188,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Pass a list of column names\n",
"df[['W','Z']]"
]
},
{
"cell_type": "code",
"execution_count": 189,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"A 2.706850\n",
"B 0.651118\n",
"C -2.018168\n",
"D 0.188695\n",
"E 0.190794\n",
"Name: W, dtype: float64"
]
},
"execution_count": 189,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# SQL Syntax (NOT RECOMMENDED!)\n",
"df.W"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"DataFrame Columns are just Series"
]
},
{
"cell_type": "code",
"execution_count": 190,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"pandas.core.series.Series"
]
},
"execution_count": 190,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(df['W'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Creating a new column:**"
]
},
{
"cell_type": "code",
"execution_count": 191,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df['new'] = df['W'] + df['Y']"
]
},
{
"cell_type": "code",
"execution_count": 192,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>W</th>\n",
" <th>X</th>\n",
" <th>Y</th>\n",
" <th>Z</th>\n",
" <th>new</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>2.706850</td>\n",
" <td>0.628133</td>\n",
" <td>0.907969</td>\n",
" <td>0.503826</td>\n",
" <td>3.614819</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>0.651118</td>\n",
" <td>-0.319318</td>\n",
" <td>-0.848077</td>\n",
" <td>0.605965</td>\n",
" <td>-0.196959</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>-2.018168</td>\n",
" <td>0.740122</td>\n",
" <td>0.528813</td>\n",
" <td>-0.589001</td>\n",
" <td>-1.489355</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>0.188695</td>\n",
" <td>-0.758872</td>\n",
" <td>-0.933237</td>\n",
" <td>0.955057</td>\n",
" <td>-0.744542</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>0.190794</td>\n",
" <td>1.978757</td>\n",
" <td>2.605967</td>\n",
" <td>0.683509</td>\n",
" <td>2.796762</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" W X Y Z new\n",
"A 2.706850 0.628133 0.907969 0.503826 3.614819\n",
"B 0.651118 -0.319318 -0.848077 0.605965 -0.196959\n",
"C -2.018168 0.740122 0.528813 -0.589001 -1.489355\n",
"D 0.188695 -0.758872 -0.933237 0.955057 -0.744542\n",
"E 0.190794 1.978757 2.605967 0.683509 2.796762"
]
},
"execution_count": 192,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"** Removing Columns**"
]
},
{
"cell_type": "code",
"execution_count": 193,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>W</th>\n",
" <th>X</th>\n",
" <th>Y</th>\n",
" <th>Z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>2.706850</td>\n",
" <td>0.628133</td>\n",
" <td>0.907969</td>\n",
" <td>0.503826</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>0.651118</td>\n",
" <td>-0.319318</td>\n",
" <td>-0.848077</td>\n",
" <td>0.605965</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>-2.018168</td>\n",
" <td>0.740122</td>\n",
" <td>0.528813</td>\n",
" <td>-0.589001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>0.188695</td>\n",
" <td>-0.758872</td>\n",
" <td>-0.933237</td>\n",
" <td>0.955057</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>0.190794</td>\n",
" <td>1.978757</td>\n",
" <td>2.605967</td>\n",
" <td>0.683509</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" W X Y Z\n",
"A 2.706850 0.628133 0.907969 0.503826\n",
"B 0.651118 -0.319318 -0.848077 0.605965\n",
"C -2.018168 0.740122 0.528813 -0.589001\n",
"D 0.188695 -0.758872 -0.933237 0.955057\n",
"E 0.190794 1.978757 2.605967 0.683509"
]
},
"execution_count": 193,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.drop('new',axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 194,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>W</th>\n",
" <th>X</th>\n",
" <th>Y</th>\n",
" <th>Z</th>\n",
" <th>new</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>2.706850</td>\n",
" <td>0.628133</td>\n",
" <td>0.907969</td>\n",
" <td>0.503826</td>\n",
" <td>3.614819</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>0.651118</td>\n",
" <td>-0.319318</td>\n",
" <td>-0.848077</td>\n",
" <td>0.605965</td>\n",
" <td>-0.196959</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>-2.018168</td>\n",
" <td>0.740122</td>\n",
" <td>0.528813</td>\n",
" <td>-0.589001</td>\n",
" <td>-1.489355</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>0.188695</td>\n",
" <td>-0.758872</td>\n",
" <td>-0.933237</td>\n",
" <td>0.955057</td>\n",
" <td>-0.744542</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>0.190794</td>\n",
" <td>1.978757</td>\n",
" <td>2.605967</td>\n",
" <td>0.683509</td>\n",
" <td>2.796762</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" W X Y Z new\n",
"A 2.706850 0.628133 0.907969 0.503826 3.614819\n",
"B 0.651118 -0.319318 -0.848077 0.605965 -0.196959\n",
"C -2.018168 0.740122 0.528813 -0.589001 -1.489355\n",
"D 0.188695 -0.758872 -0.933237 0.955057 -0.744542\n",
"E 0.190794 1.978757 2.605967 0.683509 2.796762"
]
},
"execution_count": 194,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Not inplace unless specified!\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 195,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df.drop('new',axis=1,inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 196,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>W</th>\n",
" <th>X</th>\n",
" <th>Y</th>\n",
" <th>Z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>2.706850</td>\n",
" <td>0.628133</td>\n",
" <td>0.907969</td>\n",
" <td>0.503826</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>0.651118</td>\n",
" <td>-0.319318</td>\n",
" <td>-0.848077</td>\n",
" <td>0.605965</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>-2.018168</td>\n",
" <td>0.740122</td>\n",
" <td>0.528813</td>\n",
" <td>-0.589001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>0.188695</td>\n",
" <td>-0.758872</td>\n",
" <td>-0.933237</td>\n",
" <td>0.955057</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>0.190794</td>\n",
" <td>1.978757</td>\n",
" <td>2.605967</td>\n",
" <td>0.683509</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" W X Y Z\n",
"A 2.706850 0.628133 0.907969 0.503826\n",
"B 0.651118 -0.319318 -0.848077 0.605965\n",
"C -2.018168 0.740122 0.528813 -0.589001\n",
"D 0.188695 -0.758872 -0.933237 0.955057\n",
"E 0.190794 1.978757 2.605967 0.683509"
]
},
"execution_count": 196,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Can also drop rows this way:"
]
},
{
"cell_type": "code",
"execution_count": 197,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>W</th>\n",
" <th>X</th>\n",
" <th>Y</th>\n",
" <th>Z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>2.706850</td>\n",
" <td>0.628133</td>\n",
" <td>0.907969</td>\n",
" <td>0.503826</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>0.651118</td>\n",
" <td>-0.319318</td>\n",
" <td>-0.848077</td>\n",
" <td>0.605965</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>-2.018168</td>\n",
" <td>0.740122</td>\n",
" <td>0.528813</td>\n",
" <td>-0.589001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>0.188695</td>\n",
" <td>-0.758872</td>\n",
" <td>-0.933237</td>\n",
" <td>0.955057</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" W X Y Z\n",
"A 2.706850 0.628133 0.907969 0.503826\n",
"B 0.651118 -0.319318 -0.848077 0.605965\n",
"C -2.018168 0.740122 0.528813 -0.589001\n",
"D 0.188695 -0.758872 -0.933237 0.955057"
]
},
"execution_count": 197,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.drop('E',axis=0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"** Selecting Rows**"
]
},
{
"cell_type": "code",
"execution_count": 198,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"W 2.706850\n",
"X 0.628133\n",
"Y 0.907969\n",
"Z 0.503826\n",
"Name: A, dtype: float64"
]
},
"execution_count": 198,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc['A']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Or select based off of position instead of label "
]
},
{
"cell_type": "code",
"execution_count": 199,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"W -2.018168\n",
"X 0.740122\n",
"Y 0.528813\n",
"Z -0.589001\n",
"Name: C, dtype: float64"
]
},
"execution_count": 199,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[2]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"** Selecting subset of rows and columns **"
]
},
{
"cell_type": "code",
"execution_count": 200,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"-0.84807698340363147"
]
},
"execution_count": 200,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc['B','Y']"
]
},
{
"cell_type": "code",
"execution_count": 201,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>W</th>\n",
" <th>Y</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>2.706850</td>\n",
" <td>0.907969</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>0.651118</td>\n",
" <td>-0.848077</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" W Y\n",
"A 2.706850 0.907969\n",
"B 0.651118 -0.848077"
]
},
"execution_count": 201,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[['A','B'],['W','Y']]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Conditional Selection\n",
"\n",
"An important feature of pandas is conditional selection using bracket notation, very similar to numpy:"
]
},
{
"cell_type": "code",
"execution_count": 202,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>W</th>\n",
" <th>X</th>\n",
" <th>Y</th>\n",
" <th>Z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>2.706850</td>\n",
" <td>0.628133</td>\n",
" <td>0.907969</td>\n",
" <td>0.503826</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>0.651118</td>\n",
" <td>-0.319318</td>\n",
" <td>-0.848077</td>\n",
" <td>0.605965</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>-2.018168</td>\n",
" <td>0.740122</td>\n",
" <td>0.528813</td>\n",
" <td>-0.589001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>0.188695</td>\n",
" <td>-0.758872</td>\n",
" <td>-0.933237</td>\n",
" <td>0.955057</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>0.190794</td>\n",
" <td>1.978757</td>\n",
" <td>2.605967</td>\n",
" <td>0.683509</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" W X Y Z\n",
"A 2.706850 0.628133 0.907969 0.503826\n",
"B 0.651118 -0.319318 -0.848077 0.605965\n",
"C -2.018168 0.740122 0.528813 -0.589001\n",
"D 0.188695 -0.758872 -0.933237 0.955057\n",
"E 0.190794 1.978757 2.605967 0.683509"
]
},
"execution_count": 202,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 203,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>W</th>\n",
" <th>X</th>\n",
" <th>Y</th>\n",
" <th>Z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" W X Y Z\n",
"A True True True True\n",
"B True False False True\n",
"C False True True False\n",
"D True False False True\n",
"E True True True True"
]
},
"execution_count": 203,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df>0"
]
},
{
"cell_type": "code",
"execution_count": 204,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>W</th>\n",
" <th>X</th>\n",
" <th>Y</th>\n",
" <th>Z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>2.706850</td>\n",
" <td>0.628133</td>\n",
" <td>0.907969</td>\n",
" <td>0.503826</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>0.651118</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.605965</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>NaN</td>\n",
" <td>0.740122</td>\n",
" <td>0.528813</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>0.188695</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.955057</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>0.190794</td>\n",
" <td>1.978757</td>\n",
" <td>2.605967</td>\n",
" <td>0.683509</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" W X Y Z\n",
"A 2.706850 0.628133 0.907969 0.503826\n",
"B 0.651118 NaN NaN 0.605965\n",
"C NaN 0.740122 0.528813 NaN\n",
"D 0.188695 NaN NaN 0.955057\n",
"E 0.190794 1.978757 2.605967 0.683509"
]
},
"execution_count": 204,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df>0]"
]
},
{
"cell_type": "code",
"execution_count": 205,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>W</th>\n",
" <th>X</th>\n",
" <th>Y</th>\n",
" <th>Z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>2.706850</td>\n",
" <td>0.628133</td>\n",
" <td>0.907969</td>\n",
" <td>0.503826</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>0.651118</td>\n",
" <td>-0.319318</td>\n",
" <td>-0.848077</td>\n",
" <td>0.605965</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>0.188695</td>\n",
" <td>-0.758872</td>\n",
" <td>-0.933237</td>\n",
" <td>0.955057</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>0.190794</td>\n",
" <td>1.978757</td>\n",
" <td>2.605967</td>\n",
" <td>0.683509</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" W X Y Z\n",
"A 2.706850 0.628133 0.907969 0.503826\n",
"B 0.651118 -0.319318 -0.848077 0.605965\n",
"D 0.188695 -0.758872 -0.933237 0.955057\n",
"E 0.190794 1.978757 2.605967 0.683509"
]
},
"execution_count": 205,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['W']>0]"
]
},
{
"cell_type": "code",
"execution_count": 206,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"A 0.907969\n",
"B -0.848077\n",
"D -0.933237\n",
"E 2.605967\n",
"Name: Y, dtype: float64"
]
},
"execution_count": 206,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['W']>0]['Y']"
]
},
{
"cell_type": "code",
"execution_count": 207,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Y</th>\n",
" <th>X</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>0.907969</td>\n",
" <td>0.628133</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>-0.848077</td>\n",
" <td>-0.319318</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>-0.933237</td>\n",
" <td>-0.758872</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>2.605967</td>\n",
" <td>1.978757</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Y X\n",
"A 0.907969 0.628133\n",
"B -0.848077 -0.319318\n",
"D -0.933237 -0.758872\n",
"E 2.605967 1.978757"
]
},
"execution_count": 207,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['W']>0][['Y','X']]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For two conditions you can use | and & with parenthesis:"
]
},
{
"cell_type": "code",
"execution_count": 208,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>W</th>\n",
" <th>X</th>\n",
" <th>Y</th>\n",
" <th>Z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>0.190794</td>\n",
" <td>1.978757</td>\n",
" <td>2.605967</td>\n",
" <td>0.683509</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" W X Y Z\n",
"E 0.190794 1.978757 2.605967 0.683509"
]
},
"execution_count": 208,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[(df['W']>0) & (df['Y'] > 1)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## More Index Details\n",
"\n",
"Let's discuss some more features of indexing, including resetting the index or setting it something else. We'll also talk about index hierarchy!"
]
},
{
"cell_type": "code",
"execution_count": 209,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>W</th>\n",
" <th>X</th>\n",
" <th>Y</th>\n",
" <th>Z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>2.706850</td>\n",
" <td>0.628133</td>\n",
" <td>0.907969</td>\n",
" <td>0.503826</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>0.651118</td>\n",
" <td>-0.319318</td>\n",
" <td>-0.848077</td>\n",
" <td>0.605965</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>-2.018168</td>\n",
" <td>0.740122</td>\n",
" <td>0.528813</td>\n",
" <td>-0.589001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>0.188695</td>\n",
" <td>-0.758872</td>\n",
" <td>-0.933237</td>\n",
" <td>0.955057</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>0.190794</td>\n",
" <td>1.978757</td>\n",
" <td>2.605967</td>\n",
" <td>0.683509</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" W X Y Z\n",
"A 2.706850 0.628133 0.907969 0.503826\n",
"B 0.651118 -0.319318 -0.848077 0.605965\n",
"C -2.018168 0.740122 0.528813 -0.589001\n",
"D 0.188695 -0.758872 -0.933237 0.955057\n",
"E 0.190794 1.978757 2.605967 0.683509"
]
},
"execution_count": 209,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 210,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>W</th>\n",
" <th>X</th>\n",
" <th>Y</th>\n",
" <th>Z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>A</td>\n",
" <td>2.706850</td>\n",
" <td>0.628133</td>\n",
" <td>0.907969</td>\n",
" <td>0.503826</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>B</td>\n",
" <td>0.651118</td>\n",
" <td>-0.319318</td>\n",
" <td>-0.848077</td>\n",
" <td>0.605965</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>C</td>\n",
" <td>-2.018168</td>\n",
" <td>0.740122</td>\n",
" <td>0.528813</td>\n",
" <td>-0.589001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>D</td>\n",
" <td>0.188695</td>\n",
" <td>-0.758872</td>\n",
" <td>-0.933237</td>\n",
" <td>0.955057</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>E</td>\n",
" <td>0.190794</td>\n",
" <td>1.978757</td>\n",
" <td>2.605967</td>\n",
" <td>0.683509</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" index W X Y Z\n",
"0 A 2.706850 0.628133 0.907969 0.503826\n",
"1 B 0.651118 -0.319318 -0.848077 0.605965\n",
"2 C -2.018168 0.740122 0.528813 -0.589001\n",
"3 D 0.188695 -0.758872 -0.933237 0.955057\n",
"4 E 0.190794 1.978757 2.605967 0.683509"
]
},
"execution_count": 210,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Reset to default 0,1...n index\n",
"df.reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 211,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"newind = 'CA NY WY OR CO'.split()"
]
},
{
"cell_type": "code",
"execution_count": 212,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df['States'] = newind"
]
},
{
"cell_type": "code",
"execution_count": 213,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>W</th>\n",
" <th>X</th>\n",
" <th>Y</th>\n",
" <th>Z</th>\n",
" <th>States</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>2.706850</td>\n",
" <td>0.628133</td>\n",
" <td>0.907969</td>\n",
" <td>0.503826</td>\n",
" <td>CA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>0.651118</td>\n",
" <td>-0.319318</td>\n",
" <td>-0.848077</td>\n",
" <td>0.605965</td>\n",
" <td>NY</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>-2.018168</td>\n",
" <td>0.740122</td>\n",
" <td>0.528813</td>\n",
" <td>-0.589001</td>\n",
" <td>WY</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>0.188695</td>\n",
" <td>-0.758872</td>\n",
" <td>-0.933237</td>\n",
" <td>0.955057</td>\n",
" <td>OR</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>0.190794</td>\n",
" <td>1.978757</td>\n",
" <td>2.605967</td>\n",
" <td>0.683509</td>\n",
" <td>CO</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" W X Y Z States\n",
"A 2.706850 0.628133 0.907969 0.503826 CA\n",
"B 0.651118 -0.319318 -0.848077 0.605965 NY\n",
"C -2.018168 0.740122 0.528813 -0.589001 WY\n",
"D 0.188695 -0.758872 -0.933237 0.955057 OR\n",
"E 0.190794 1.978757 2.605967 0.683509 CO"
]
},
"execution_count": 213,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 214,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>W</th>\n",
" <th>X</th>\n",
" <th>Y</th>\n",
" <th>Z</th>\n",
" </tr>\n",
" <tr>\n",
" <th>States</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>CA</th>\n",
" <td>2.706850</td>\n",
" <td>0.628133</td>\n",
" <td>0.907969</td>\n",
" <td>0.503826</td>\n",
" </tr>\n",
" <tr>\n",
" <th>NY</th>\n",
" <td>0.651118</td>\n",
" <td>-0.319318</td>\n",
" <td>-0.848077</td>\n",
" <td>0.605965</td>\n",
" </tr>\n",
" <tr>\n",
" <th>WY</th>\n",
" <td>-2.018168</td>\n",
" <td>0.740122</td>\n",
" <td>0.528813</td>\n",
" <td>-0.589001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>OR</th>\n",
" <td>0.188695</td>\n",
" <td>-0.758872</td>\n",
" <td>-0.933237</td>\n",
" <td>0.955057</td>\n",
" </tr>\n",
" <tr>\n",
" <th>CO</th>\n",
" <td>0.190794</td>\n",
" <td>1.978757</td>\n",
" <td>2.605967</td>\n",
" <td>0.683509</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" W X Y Z\n",
"States \n",
"CA 2.706850 0.628133 0.907969 0.503826\n",
"NY 0.651118 -0.319318 -0.848077 0.605965\n",
"WY -2.018168 0.740122 0.528813 -0.589001\n",
"OR 0.188695 -0.758872 -0.933237 0.955057\n",
"CO 0.190794 1.978757 2.605967 0.683509"
]
},
"execution_count": 214,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.set_index('States')"
]
},
{
"cell_type": "code",
"execution_count": 215,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>W</th>\n",
" <th>X</th>\n",
" <th>Y</th>\n",
" <th>Z</th>\n",
" <th>States</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>2.706850</td>\n",
" <td>0.628133</td>\n",
" <td>0.907969</td>\n",
" <td>0.503826</td>\n",
" <td>CA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>0.651118</td>\n",
" <td>-0.319318</td>\n",
" <td>-0.848077</td>\n",
" <td>0.605965</td>\n",
" <td>NY</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>-2.018168</td>\n",
" <td>0.740122</td>\n",
" <td>0.528813</td>\n",
" <td>-0.589001</td>\n",
" <td>WY</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>0.188695</td>\n",
" <td>-0.758872</td>\n",
" <td>-0.933237</td>\n",
" <td>0.955057</td>\n",
" <td>OR</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>0.190794</td>\n",
" <td>1.978757</td>\n",
" <td>2.605967</td>\n",
" <td>0.683509</td>\n",
" <td>CO</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" W X Y Z States\n",
"A 2.706850 0.628133 0.907969 0.503826 CA\n",
"B 0.651118 -0.319318 -0.848077 0.605965 NY\n",
"C -2.018168 0.740122 0.528813 -0.589001 WY\n",
"D 0.188695 -0.758872 -0.933237 0.955057 OR\n",
"E 0.190794 1.978757 2.605967 0.683509 CO"
]
},
"execution_count": 215,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 216,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df.set_index('States',inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 218,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>W</th>\n",
" <th>X</th>\n",
" <th>Y</th>\n",
" <th>Z</th>\n",
" </tr>\n",
" <tr>\n",
" <th>States</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>CA</th>\n",
" <td>2.706850</td>\n",
" <td>0.628133</td>\n",
" <td>0.907969</td>\n",
" <td>0.503826</td>\n",
" </tr>\n",
" <tr>\n",
" <th>NY</th>\n",
" <td>0.651118</td>\n",
" <td>-0.319318</td>\n",
" <td>-0.848077</td>\n",
" <td>0.605965</td>\n",
" </tr>\n",
" <tr>\n",
" <th>WY</th>\n",
" <td>-2.018168</td>\n",
" <td>0.740122</td>\n",
" <td>0.528813</td>\n",
" <td>-0.589001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>OR</th>\n",
" <td>0.188695</td>\n",
" <td>-0.758872</td>\n",
" <td>-0.933237</td>\n",
" <td>0.955057</td>\n",
" </tr>\n",
" <tr>\n",
" <th>CO</th>\n",
" <td>0.190794</td>\n",
" <td>1.978757</td>\n",
" <td>2.605967</td>\n",
" <td>0.683509</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" W X Y Z\n",
"States \n",
"CA 2.706850 0.628133 0.907969 0.503826\n",
"NY 0.651118 -0.319318 -0.848077 0.605965\n",
"WY -2.018168 0.740122 0.528813 -0.589001\n",
"OR 0.188695 -0.758872 -0.933237 0.955057\n",
"CO 0.190794 1.978757 2.605967 0.683509"
]
},
"execution_count": 218,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Multi-Index and Index Hierarchy\n",
"\n",
"Let us go over how to work with Multi-Index, first we'll create a quick example of what a Multi-Indexed DataFrame would look like:"
]
},
{
"cell_type": "code",
"execution_count": 253,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Index Levels\n",
"outside = ['G1','G1','G1','G2','G2','G2']\n",
"inside = [1,2,3,1,2,3]\n",
"hier_index = list(zip(outside,inside))\n",
"hier_index = pd.MultiIndex.from_tuples(hier_index)"
]
},
{
"cell_type": "code",
"execution_count": 254,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"MultiIndex(levels=[['G1', 'G2'], [1, 2, 3]],\n",
" labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])"
]
},
"execution_count": 254,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"hier_index"
]
},
{
"cell_type": "code",
"execution_count": 257,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">G1</th>\n",
" <th>1</th>\n",
" <td>0.153661</td>\n",
" <td>0.167638</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-0.765930</td>\n",
" <td>0.962299</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.902826</td>\n",
" <td>-0.537909</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">G2</th>\n",
" <th>1</th>\n",
" <td>-1.549671</td>\n",
" <td>0.435253</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.259904</td>\n",
" <td>-0.447898</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.266207</td>\n",
" <td>0.412580</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B\n",
"G1 1 0.153661 0.167638\n",
" 2 -0.765930 0.962299\n",
" 3 0.902826 -0.537909\n",
"G2 1 -1.549671 0.435253\n",
" 2 1.259904 -0.447898\n",
" 3 0.266207 0.412580"
]
},
"execution_count": 257,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame(np.random.randn(6,2),index=hier_index,columns=['A','B'])\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now let's show how to index this! For index hierarchy we use df.loc[], if this was on the columns axis, you would just use normal bracket notation df[]. Calling one level of the index returns the sub-dataframe:"
]
},
{
"cell_type": "code",
"execution_count": 260,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.153661</td>\n",
" <td>0.167638</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-0.765930</td>\n",
" <td>0.962299</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.902826</td>\n",
" <td>-0.537909</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B\n",
"1 0.153661 0.167638\n",
"2 -0.765930 0.962299\n",
"3 0.902826 -0.537909"
]
},
"execution_count": 260,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc['G1']"
]
},
{
"cell_type": "code",
"execution_count": 263,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"A 0.153661\n",
"B 0.167638\n",
"Name: 1, dtype: float64"
]
},
"execution_count": 263,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc['G1'].loc[1]"
]
},
{
"cell_type": "code",
"execution_count": 265,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"FrozenList([None, None])"
]
},
"execution_count": 265,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.index.names"
]
},
{
"cell_type": "code",
"execution_count": 266,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df.index.names = ['Group','Num']"
]
},
{
"cell_type": "code",
"execution_count": 267,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Group</th>\n",
" <th>Num</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">G1</th>\n",
" <th>1</th>\n",
" <td>0.153661</td>\n",
" <td>0.167638</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-0.765930</td>\n",
" <td>0.962299</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.902826</td>\n",
" <td>-0.537909</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">G2</th>\n",
" <th>1</th>\n",
" <td>-1.549671</td>\n",
" <td>0.435253</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.259904</td>\n",
" <td>-0.447898</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.266207</td>\n",
" <td>0.412580</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B\n",
"Group Num \n",
"G1 1 0.153661 0.167638\n",
" 2 -0.765930 0.962299\n",
" 3 0.902826 -0.537909\n",
"G2 1 -1.549671 0.435253\n",
" 2 1.259904 -0.447898\n",
" 3 0.266207 0.412580"
]
},
"execution_count": 267,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 270,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Num</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.153661</td>\n",
" <td>0.167638</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-0.765930</td>\n",
" <td>0.962299</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.902826</td>\n",
" <td>-0.537909</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B\n",
"Num \n",
"1 0.153661 0.167638\n",
"2 -0.765930 0.962299\n",
"3 0.902826 -0.537909"
]
},
"execution_count": 270,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.xs('G1')"
]
},
{
"cell_type": "code",
"execution_count": 271,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"A 0.153661\n",
"B 0.167638\n",
"Name: (G1, 1), dtype: float64"
]
},
"execution_count": 271,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.xs(['G1',1])"
]
},
{
"cell_type": "code",
"execution_count": 273,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Group</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>G1</th>\n",
" <td>0.153661</td>\n",
" <td>0.167638</td>\n",
" </tr>\n",
" <tr>\n",
" <th>G2</th>\n",
" <td>-1.549671</td>\n",
" <td>0.435253</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B\n",
"Group \n",
"G1 0.153661 0.167638\n",
"G2 -1.549671 0.435253"
]
},
"execution_count": 273,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.xs(1,level='Num')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Great Job!"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1"
}
},
"nbformat": 4,
"nbformat_minor": 0
}