2391 lines
58 KiB
Plaintext
2391 lines
58 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"___\n",
|
||
|
"\n",
|
||
|
"<a href='http://www.pieriandata.com'> <img src='../Pierian_Data_Logo.png' /></a>\n",
|
||
|
"___"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {
|
||
|
"collapsed": true
|
||
|
},
|
||
|
"source": [
|
||
|
"# DataFrames\n",
|
||
|
"\n",
|
||
|
"DataFrames are the workhorse of pandas and are directly inspired by the R programming language. We can think of a DataFrame as a bunch of Series objects put together to share the same index. Let's use pandas to explore this topic!"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 183,
|
||
|
"metadata": {
|
||
|
"collapsed": true
|
||
|
},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import pandas as pd\n",
|
||
|
"import numpy as np"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 184,
|
||
|
"metadata": {
|
||
|
"collapsed": true
|
||
|
},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from numpy.random import randn\n",
|
||
|
"np.random.seed(101)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 185,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"df = pd.DataFrame(randn(5,4),index='A B C D E'.split(),columns='W X Y Z'.split())"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 186,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>W</th>\n",
|
||
|
" <th>X</th>\n",
|
||
|
" <th>Y</th>\n",
|
||
|
" <th>Z</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>A</th>\n",
|
||
|
" <td>2.706850</td>\n",
|
||
|
" <td>0.628133</td>\n",
|
||
|
" <td>0.907969</td>\n",
|
||
|
" <td>0.503826</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>B</th>\n",
|
||
|
" <td>0.651118</td>\n",
|
||
|
" <td>-0.319318</td>\n",
|
||
|
" <td>-0.848077</td>\n",
|
||
|
" <td>0.605965</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>C</th>\n",
|
||
|
" <td>-2.018168</td>\n",
|
||
|
" <td>0.740122</td>\n",
|
||
|
" <td>0.528813</td>\n",
|
||
|
" <td>-0.589001</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>D</th>\n",
|
||
|
" <td>0.188695</td>\n",
|
||
|
" <td>-0.758872</td>\n",
|
||
|
" <td>-0.933237</td>\n",
|
||
|
" <td>0.955057</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>E</th>\n",
|
||
|
" <td>0.190794</td>\n",
|
||
|
" <td>1.978757</td>\n",
|
||
|
" <td>2.605967</td>\n",
|
||
|
" <td>0.683509</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" W X Y Z\n",
|
||
|
"A 2.706850 0.628133 0.907969 0.503826\n",
|
||
|
"B 0.651118 -0.319318 -0.848077 0.605965\n",
|
||
|
"C -2.018168 0.740122 0.528813 -0.589001\n",
|
||
|
"D 0.188695 -0.758872 -0.933237 0.955057\n",
|
||
|
"E 0.190794 1.978757 2.605967 0.683509"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 186,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Selection and Indexing\n",
|
||
|
"\n",
|
||
|
"Let's learn the various methods to grab data from a DataFrame"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 187,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"A 2.706850\n",
|
||
|
"B 0.651118\n",
|
||
|
"C -2.018168\n",
|
||
|
"D 0.188695\n",
|
||
|
"E 0.190794\n",
|
||
|
"Name: W, dtype: float64"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 187,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df['W']"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 188,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>W</th>\n",
|
||
|
" <th>Z</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>A</th>\n",
|
||
|
" <td>2.706850</td>\n",
|
||
|
" <td>0.503826</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>B</th>\n",
|
||
|
" <td>0.651118</td>\n",
|
||
|
" <td>0.605965</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>C</th>\n",
|
||
|
" <td>-2.018168</td>\n",
|
||
|
" <td>-0.589001</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>D</th>\n",
|
||
|
" <td>0.188695</td>\n",
|
||
|
" <td>0.955057</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>E</th>\n",
|
||
|
" <td>0.190794</td>\n",
|
||
|
" <td>0.683509</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" W Z\n",
|
||
|
"A 2.706850 0.503826\n",
|
||
|
"B 0.651118 0.605965\n",
|
||
|
"C -2.018168 -0.589001\n",
|
||
|
"D 0.188695 0.955057\n",
|
||
|
"E 0.190794 0.683509"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 188,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Pass a list of column names\n",
|
||
|
"df[['W','Z']]"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 189,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"A 2.706850\n",
|
||
|
"B 0.651118\n",
|
||
|
"C -2.018168\n",
|
||
|
"D 0.188695\n",
|
||
|
"E 0.190794\n",
|
||
|
"Name: W, dtype: float64"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 189,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# SQL Syntax (NOT RECOMMENDED!)\n",
|
||
|
"df.W"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"DataFrame Columns are just Series"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 190,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"pandas.core.series.Series"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 190,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"type(df['W'])"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"**Creating a new column:**"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 191,
|
||
|
"metadata": {
|
||
|
"collapsed": true
|
||
|
},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"df['new'] = df['W'] + df['Y']"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 192,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>W</th>\n",
|
||
|
" <th>X</th>\n",
|
||
|
" <th>Y</th>\n",
|
||
|
" <th>Z</th>\n",
|
||
|
" <th>new</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>A</th>\n",
|
||
|
" <td>2.706850</td>\n",
|
||
|
" <td>0.628133</td>\n",
|
||
|
" <td>0.907969</td>\n",
|
||
|
" <td>0.503826</td>\n",
|
||
|
" <td>3.614819</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>B</th>\n",
|
||
|
" <td>0.651118</td>\n",
|
||
|
" <td>-0.319318</td>\n",
|
||
|
" <td>-0.848077</td>\n",
|
||
|
" <td>0.605965</td>\n",
|
||
|
" <td>-0.196959</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>C</th>\n",
|
||
|
" <td>-2.018168</td>\n",
|
||
|
" <td>0.740122</td>\n",
|
||
|
" <td>0.528813</td>\n",
|
||
|
" <td>-0.589001</td>\n",
|
||
|
" <td>-1.489355</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>D</th>\n",
|
||
|
" <td>0.188695</td>\n",
|
||
|
" <td>-0.758872</td>\n",
|
||
|
" <td>-0.933237</td>\n",
|
||
|
" <td>0.955057</td>\n",
|
||
|
" <td>-0.744542</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>E</th>\n",
|
||
|
" <td>0.190794</td>\n",
|
||
|
" <td>1.978757</td>\n",
|
||
|
" <td>2.605967</td>\n",
|
||
|
" <td>0.683509</td>\n",
|
||
|
" <td>2.796762</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" W X Y Z new\n",
|
||
|
"A 2.706850 0.628133 0.907969 0.503826 3.614819\n",
|
||
|
"B 0.651118 -0.319318 -0.848077 0.605965 -0.196959\n",
|
||
|
"C -2.018168 0.740122 0.528813 -0.589001 -1.489355\n",
|
||
|
"D 0.188695 -0.758872 -0.933237 0.955057 -0.744542\n",
|
||
|
"E 0.190794 1.978757 2.605967 0.683509 2.796762"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 192,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"** Removing Columns**"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 193,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>W</th>\n",
|
||
|
" <th>X</th>\n",
|
||
|
" <th>Y</th>\n",
|
||
|
" <th>Z</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>A</th>\n",
|
||
|
" <td>2.706850</td>\n",
|
||
|
" <td>0.628133</td>\n",
|
||
|
" <td>0.907969</td>\n",
|
||
|
" <td>0.503826</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>B</th>\n",
|
||
|
" <td>0.651118</td>\n",
|
||
|
" <td>-0.319318</td>\n",
|
||
|
" <td>-0.848077</td>\n",
|
||
|
" <td>0.605965</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>C</th>\n",
|
||
|
" <td>-2.018168</td>\n",
|
||
|
" <td>0.740122</td>\n",
|
||
|
" <td>0.528813</td>\n",
|
||
|
" <td>-0.589001</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>D</th>\n",
|
||
|
" <td>0.188695</td>\n",
|
||
|
" <td>-0.758872</td>\n",
|
||
|
" <td>-0.933237</td>\n",
|
||
|
" <td>0.955057</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>E</th>\n",
|
||
|
" <td>0.190794</td>\n",
|
||
|
" <td>1.978757</td>\n",
|
||
|
" <td>2.605967</td>\n",
|
||
|
" <td>0.683509</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" W X Y Z\n",
|
||
|
"A 2.706850 0.628133 0.907969 0.503826\n",
|
||
|
"B 0.651118 -0.319318 -0.848077 0.605965\n",
|
||
|
"C -2.018168 0.740122 0.528813 -0.589001\n",
|
||
|
"D 0.188695 -0.758872 -0.933237 0.955057\n",
|
||
|
"E 0.190794 1.978757 2.605967 0.683509"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 193,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df.drop('new',axis=1)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 194,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>W</th>\n",
|
||
|
" <th>X</th>\n",
|
||
|
" <th>Y</th>\n",
|
||
|
" <th>Z</th>\n",
|
||
|
" <th>new</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>A</th>\n",
|
||
|
" <td>2.706850</td>\n",
|
||
|
" <td>0.628133</td>\n",
|
||
|
" <td>0.907969</td>\n",
|
||
|
" <td>0.503826</td>\n",
|
||
|
" <td>3.614819</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>B</th>\n",
|
||
|
" <td>0.651118</td>\n",
|
||
|
" <td>-0.319318</td>\n",
|
||
|
" <td>-0.848077</td>\n",
|
||
|
" <td>0.605965</td>\n",
|
||
|
" <td>-0.196959</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>C</th>\n",
|
||
|
" <td>-2.018168</td>\n",
|
||
|
" <td>0.740122</td>\n",
|
||
|
" <td>0.528813</td>\n",
|
||
|
" <td>-0.589001</td>\n",
|
||
|
" <td>-1.489355</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>D</th>\n",
|
||
|
" <td>0.188695</td>\n",
|
||
|
" <td>-0.758872</td>\n",
|
||
|
" <td>-0.933237</td>\n",
|
||
|
" <td>0.955057</td>\n",
|
||
|
" <td>-0.744542</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>E</th>\n",
|
||
|
" <td>0.190794</td>\n",
|
||
|
" <td>1.978757</td>\n",
|
||
|
" <td>2.605967</td>\n",
|
||
|
" <td>0.683509</td>\n",
|
||
|
" <td>2.796762</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" W X Y Z new\n",
|
||
|
"A 2.706850 0.628133 0.907969 0.503826 3.614819\n",
|
||
|
"B 0.651118 -0.319318 -0.848077 0.605965 -0.196959\n",
|
||
|
"C -2.018168 0.740122 0.528813 -0.589001 -1.489355\n",
|
||
|
"D 0.188695 -0.758872 -0.933237 0.955057 -0.744542\n",
|
||
|
"E 0.190794 1.978757 2.605967 0.683509 2.796762"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 194,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Not inplace unless specified!\n",
|
||
|
"df"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 195,
|
||
|
"metadata": {
|
||
|
"collapsed": true
|
||
|
},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"df.drop('new',axis=1,inplace=True)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 196,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>W</th>\n",
|
||
|
" <th>X</th>\n",
|
||
|
" <th>Y</th>\n",
|
||
|
" <th>Z</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>A</th>\n",
|
||
|
" <td>2.706850</td>\n",
|
||
|
" <td>0.628133</td>\n",
|
||
|
" <td>0.907969</td>\n",
|
||
|
" <td>0.503826</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>B</th>\n",
|
||
|
" <td>0.651118</td>\n",
|
||
|
" <td>-0.319318</td>\n",
|
||
|
" <td>-0.848077</td>\n",
|
||
|
" <td>0.605965</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>C</th>\n",
|
||
|
" <td>-2.018168</td>\n",
|
||
|
" <td>0.740122</td>\n",
|
||
|
" <td>0.528813</td>\n",
|
||
|
" <td>-0.589001</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>D</th>\n",
|
||
|
" <td>0.188695</td>\n",
|
||
|
" <td>-0.758872</td>\n",
|
||
|
" <td>-0.933237</td>\n",
|
||
|
" <td>0.955057</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>E</th>\n",
|
||
|
" <td>0.190794</td>\n",
|
||
|
" <td>1.978757</td>\n",
|
||
|
" <td>2.605967</td>\n",
|
||
|
" <td>0.683509</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" W X Y Z\n",
|
||
|
"A 2.706850 0.628133 0.907969 0.503826\n",
|
||
|
"B 0.651118 -0.319318 -0.848077 0.605965\n",
|
||
|
"C -2.018168 0.740122 0.528813 -0.589001\n",
|
||
|
"D 0.188695 -0.758872 -0.933237 0.955057\n",
|
||
|
"E 0.190794 1.978757 2.605967 0.683509"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 196,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"Can also drop rows this way:"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 197,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>W</th>\n",
|
||
|
" <th>X</th>\n",
|
||
|
" <th>Y</th>\n",
|
||
|
" <th>Z</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>A</th>\n",
|
||
|
" <td>2.706850</td>\n",
|
||
|
" <td>0.628133</td>\n",
|
||
|
" <td>0.907969</td>\n",
|
||
|
" <td>0.503826</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>B</th>\n",
|
||
|
" <td>0.651118</td>\n",
|
||
|
" <td>-0.319318</td>\n",
|
||
|
" <td>-0.848077</td>\n",
|
||
|
" <td>0.605965</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>C</th>\n",
|
||
|
" <td>-2.018168</td>\n",
|
||
|
" <td>0.740122</td>\n",
|
||
|
" <td>0.528813</td>\n",
|
||
|
" <td>-0.589001</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>D</th>\n",
|
||
|
" <td>0.188695</td>\n",
|
||
|
" <td>-0.758872</td>\n",
|
||
|
" <td>-0.933237</td>\n",
|
||
|
" <td>0.955057</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" W X Y Z\n",
|
||
|
"A 2.706850 0.628133 0.907969 0.503826\n",
|
||
|
"B 0.651118 -0.319318 -0.848077 0.605965\n",
|
||
|
"C -2.018168 0.740122 0.528813 -0.589001\n",
|
||
|
"D 0.188695 -0.758872 -0.933237 0.955057"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 197,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df.drop('E',axis=0)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"** Selecting Rows**"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 198,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"W 2.706850\n",
|
||
|
"X 0.628133\n",
|
||
|
"Y 0.907969\n",
|
||
|
"Z 0.503826\n",
|
||
|
"Name: A, dtype: float64"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 198,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df.loc['A']"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"Or select based off of position instead of label "
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 199,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"W -2.018168\n",
|
||
|
"X 0.740122\n",
|
||
|
"Y 0.528813\n",
|
||
|
"Z -0.589001\n",
|
||
|
"Name: C, dtype: float64"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 199,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df.iloc[2]"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"** Selecting subset of rows and columns **"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 200,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"-0.84807698340363147"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 200,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df.loc['B','Y']"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 201,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>W</th>\n",
|
||
|
" <th>Y</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>A</th>\n",
|
||
|
" <td>2.706850</td>\n",
|
||
|
" <td>0.907969</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>B</th>\n",
|
||
|
" <td>0.651118</td>\n",
|
||
|
" <td>-0.848077</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" W Y\n",
|
||
|
"A 2.706850 0.907969\n",
|
||
|
"B 0.651118 -0.848077"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 201,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df.loc[['A','B'],['W','Y']]"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Conditional Selection\n",
|
||
|
"\n",
|
||
|
"An important feature of pandas is conditional selection using bracket notation, very similar to numpy:"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 202,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>W</th>\n",
|
||
|
" <th>X</th>\n",
|
||
|
" <th>Y</th>\n",
|
||
|
" <th>Z</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>A</th>\n",
|
||
|
" <td>2.706850</td>\n",
|
||
|
" <td>0.628133</td>\n",
|
||
|
" <td>0.907969</td>\n",
|
||
|
" <td>0.503826</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>B</th>\n",
|
||
|
" <td>0.651118</td>\n",
|
||
|
" <td>-0.319318</td>\n",
|
||
|
" <td>-0.848077</td>\n",
|
||
|
" <td>0.605965</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>C</th>\n",
|
||
|
" <td>-2.018168</td>\n",
|
||
|
" <td>0.740122</td>\n",
|
||
|
" <td>0.528813</td>\n",
|
||
|
" <td>-0.589001</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>D</th>\n",
|
||
|
" <td>0.188695</td>\n",
|
||
|
" <td>-0.758872</td>\n",
|
||
|
" <td>-0.933237</td>\n",
|
||
|
" <td>0.955057</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>E</th>\n",
|
||
|
" <td>0.190794</td>\n",
|
||
|
" <td>1.978757</td>\n",
|
||
|
" <td>2.605967</td>\n",
|
||
|
" <td>0.683509</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" W X Y Z\n",
|
||
|
"A 2.706850 0.628133 0.907969 0.503826\n",
|
||
|
"B 0.651118 -0.319318 -0.848077 0.605965\n",
|
||
|
"C -2.018168 0.740122 0.528813 -0.589001\n",
|
||
|
"D 0.188695 -0.758872 -0.933237 0.955057\n",
|
||
|
"E 0.190794 1.978757 2.605967 0.683509"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 202,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 203,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>W</th>\n",
|
||
|
" <th>X</th>\n",
|
||
|
" <th>Y</th>\n",
|
||
|
" <th>Z</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>A</th>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>B</th>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" <td>False</td>\n",
|
||
|
" <td>False</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>C</th>\n",
|
||
|
" <td>False</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" <td>False</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>D</th>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" <td>False</td>\n",
|
||
|
" <td>False</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>E</th>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" W X Y Z\n",
|
||
|
"A True True True True\n",
|
||
|
"B True False False True\n",
|
||
|
"C False True True False\n",
|
||
|
"D True False False True\n",
|
||
|
"E True True True True"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 203,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df>0"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 204,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>W</th>\n",
|
||
|
" <th>X</th>\n",
|
||
|
" <th>Y</th>\n",
|
||
|
" <th>Z</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>A</th>\n",
|
||
|
" <td>2.706850</td>\n",
|
||
|
" <td>0.628133</td>\n",
|
||
|
" <td>0.907969</td>\n",
|
||
|
" <td>0.503826</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>B</th>\n",
|
||
|
" <td>0.651118</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>0.605965</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>C</th>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>0.740122</td>\n",
|
||
|
" <td>0.528813</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>D</th>\n",
|
||
|
" <td>0.188695</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>0.955057</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>E</th>\n",
|
||
|
" <td>0.190794</td>\n",
|
||
|
" <td>1.978757</td>\n",
|
||
|
" <td>2.605967</td>\n",
|
||
|
" <td>0.683509</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" W X Y Z\n",
|
||
|
"A 2.706850 0.628133 0.907969 0.503826\n",
|
||
|
"B 0.651118 NaN NaN 0.605965\n",
|
||
|
"C NaN 0.740122 0.528813 NaN\n",
|
||
|
"D 0.188695 NaN NaN 0.955057\n",
|
||
|
"E 0.190794 1.978757 2.605967 0.683509"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 204,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df[df>0]"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 205,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>W</th>\n",
|
||
|
" <th>X</th>\n",
|
||
|
" <th>Y</th>\n",
|
||
|
" <th>Z</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>A</th>\n",
|
||
|
" <td>2.706850</td>\n",
|
||
|
" <td>0.628133</td>\n",
|
||
|
" <td>0.907969</td>\n",
|
||
|
" <td>0.503826</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>B</th>\n",
|
||
|
" <td>0.651118</td>\n",
|
||
|
" <td>-0.319318</td>\n",
|
||
|
" <td>-0.848077</td>\n",
|
||
|
" <td>0.605965</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>D</th>\n",
|
||
|
" <td>0.188695</td>\n",
|
||
|
" <td>-0.758872</td>\n",
|
||
|
" <td>-0.933237</td>\n",
|
||
|
" <td>0.955057</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>E</th>\n",
|
||
|
" <td>0.190794</td>\n",
|
||
|
" <td>1.978757</td>\n",
|
||
|
" <td>2.605967</td>\n",
|
||
|
" <td>0.683509</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" W X Y Z\n",
|
||
|
"A 2.706850 0.628133 0.907969 0.503826\n",
|
||
|
"B 0.651118 -0.319318 -0.848077 0.605965\n",
|
||
|
"D 0.188695 -0.758872 -0.933237 0.955057\n",
|
||
|
"E 0.190794 1.978757 2.605967 0.683509"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 205,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df[df['W']>0]"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 206,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"A 0.907969\n",
|
||
|
"B -0.848077\n",
|
||
|
"D -0.933237\n",
|
||
|
"E 2.605967\n",
|
||
|
"Name: Y, dtype: float64"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 206,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df[df['W']>0]['Y']"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 207,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>Y</th>\n",
|
||
|
" <th>X</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>A</th>\n",
|
||
|
" <td>0.907969</td>\n",
|
||
|
" <td>0.628133</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>B</th>\n",
|
||
|
" <td>-0.848077</td>\n",
|
||
|
" <td>-0.319318</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>D</th>\n",
|
||
|
" <td>-0.933237</td>\n",
|
||
|
" <td>-0.758872</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>E</th>\n",
|
||
|
" <td>2.605967</td>\n",
|
||
|
" <td>1.978757</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" Y X\n",
|
||
|
"A 0.907969 0.628133\n",
|
||
|
"B -0.848077 -0.319318\n",
|
||
|
"D -0.933237 -0.758872\n",
|
||
|
"E 2.605967 1.978757"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 207,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df[df['W']>0][['Y','X']]"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"For two conditions you can use | and & with parenthesis:"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 208,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>W</th>\n",
|
||
|
" <th>X</th>\n",
|
||
|
" <th>Y</th>\n",
|
||
|
" <th>Z</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>E</th>\n",
|
||
|
" <td>0.190794</td>\n",
|
||
|
" <td>1.978757</td>\n",
|
||
|
" <td>2.605967</td>\n",
|
||
|
" <td>0.683509</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" W X Y Z\n",
|
||
|
"E 0.190794 1.978757 2.605967 0.683509"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 208,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df[(df['W']>0) & (df['Y'] > 1)]"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## More Index Details\n",
|
||
|
"\n",
|
||
|
"Let's discuss some more features of indexing, including resetting the index or setting it something else. We'll also talk about index hierarchy!"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 209,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>W</th>\n",
|
||
|
" <th>X</th>\n",
|
||
|
" <th>Y</th>\n",
|
||
|
" <th>Z</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>A</th>\n",
|
||
|
" <td>2.706850</td>\n",
|
||
|
" <td>0.628133</td>\n",
|
||
|
" <td>0.907969</td>\n",
|
||
|
" <td>0.503826</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>B</th>\n",
|
||
|
" <td>0.651118</td>\n",
|
||
|
" <td>-0.319318</td>\n",
|
||
|
" <td>-0.848077</td>\n",
|
||
|
" <td>0.605965</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>C</th>\n",
|
||
|
" <td>-2.018168</td>\n",
|
||
|
" <td>0.740122</td>\n",
|
||
|
" <td>0.528813</td>\n",
|
||
|
" <td>-0.589001</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>D</th>\n",
|
||
|
" <td>0.188695</td>\n",
|
||
|
" <td>-0.758872</td>\n",
|
||
|
" <td>-0.933237</td>\n",
|
||
|
" <td>0.955057</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>E</th>\n",
|
||
|
" <td>0.190794</td>\n",
|
||
|
" <td>1.978757</td>\n",
|
||
|
" <td>2.605967</td>\n",
|
||
|
" <td>0.683509</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" W X Y Z\n",
|
||
|
"A 2.706850 0.628133 0.907969 0.503826\n",
|
||
|
"B 0.651118 -0.319318 -0.848077 0.605965\n",
|
||
|
"C -2.018168 0.740122 0.528813 -0.589001\n",
|
||
|
"D 0.188695 -0.758872 -0.933237 0.955057\n",
|
||
|
"E 0.190794 1.978757 2.605967 0.683509"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 209,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 210,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>index</th>\n",
|
||
|
" <th>W</th>\n",
|
||
|
" <th>X</th>\n",
|
||
|
" <th>Y</th>\n",
|
||
|
" <th>Z</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>A</td>\n",
|
||
|
" <td>2.706850</td>\n",
|
||
|
" <td>0.628133</td>\n",
|
||
|
" <td>0.907969</td>\n",
|
||
|
" <td>0.503826</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>B</td>\n",
|
||
|
" <td>0.651118</td>\n",
|
||
|
" <td>-0.319318</td>\n",
|
||
|
" <td>-0.848077</td>\n",
|
||
|
" <td>0.605965</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>C</td>\n",
|
||
|
" <td>-2.018168</td>\n",
|
||
|
" <td>0.740122</td>\n",
|
||
|
" <td>0.528813</td>\n",
|
||
|
" <td>-0.589001</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>D</td>\n",
|
||
|
" <td>0.188695</td>\n",
|
||
|
" <td>-0.758872</td>\n",
|
||
|
" <td>-0.933237</td>\n",
|
||
|
" <td>0.955057</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>E</td>\n",
|
||
|
" <td>0.190794</td>\n",
|
||
|
" <td>1.978757</td>\n",
|
||
|
" <td>2.605967</td>\n",
|
||
|
" <td>0.683509</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" index W X Y Z\n",
|
||
|
"0 A 2.706850 0.628133 0.907969 0.503826\n",
|
||
|
"1 B 0.651118 -0.319318 -0.848077 0.605965\n",
|
||
|
"2 C -2.018168 0.740122 0.528813 -0.589001\n",
|
||
|
"3 D 0.188695 -0.758872 -0.933237 0.955057\n",
|
||
|
"4 E 0.190794 1.978757 2.605967 0.683509"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 210,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Reset to default 0,1...n index\n",
|
||
|
"df.reset_index()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 211,
|
||
|
"metadata": {
|
||
|
"collapsed": true
|
||
|
},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"newind = 'CA NY WY OR CO'.split()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 212,
|
||
|
"metadata": {
|
||
|
"collapsed": true
|
||
|
},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"df['States'] = newind"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 213,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>W</th>\n",
|
||
|
" <th>X</th>\n",
|
||
|
" <th>Y</th>\n",
|
||
|
" <th>Z</th>\n",
|
||
|
" <th>States</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>A</th>\n",
|
||
|
" <td>2.706850</td>\n",
|
||
|
" <td>0.628133</td>\n",
|
||
|
" <td>0.907969</td>\n",
|
||
|
" <td>0.503826</td>\n",
|
||
|
" <td>CA</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>B</th>\n",
|
||
|
" <td>0.651118</td>\n",
|
||
|
" <td>-0.319318</td>\n",
|
||
|
" <td>-0.848077</td>\n",
|
||
|
" <td>0.605965</td>\n",
|
||
|
" <td>NY</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>C</th>\n",
|
||
|
" <td>-2.018168</td>\n",
|
||
|
" <td>0.740122</td>\n",
|
||
|
" <td>0.528813</td>\n",
|
||
|
" <td>-0.589001</td>\n",
|
||
|
" <td>WY</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>D</th>\n",
|
||
|
" <td>0.188695</td>\n",
|
||
|
" <td>-0.758872</td>\n",
|
||
|
" <td>-0.933237</td>\n",
|
||
|
" <td>0.955057</td>\n",
|
||
|
" <td>OR</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>E</th>\n",
|
||
|
" <td>0.190794</td>\n",
|
||
|
" <td>1.978757</td>\n",
|
||
|
" <td>2.605967</td>\n",
|
||
|
" <td>0.683509</td>\n",
|
||
|
" <td>CO</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" W X Y Z States\n",
|
||
|
"A 2.706850 0.628133 0.907969 0.503826 CA\n",
|
||
|
"B 0.651118 -0.319318 -0.848077 0.605965 NY\n",
|
||
|
"C -2.018168 0.740122 0.528813 -0.589001 WY\n",
|
||
|
"D 0.188695 -0.758872 -0.933237 0.955057 OR\n",
|
||
|
"E 0.190794 1.978757 2.605967 0.683509 CO"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 213,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 214,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>W</th>\n",
|
||
|
" <th>X</th>\n",
|
||
|
" <th>Y</th>\n",
|
||
|
" <th>Z</th>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>States</th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>CA</th>\n",
|
||
|
" <td>2.706850</td>\n",
|
||
|
" <td>0.628133</td>\n",
|
||
|
" <td>0.907969</td>\n",
|
||
|
" <td>0.503826</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>NY</th>\n",
|
||
|
" <td>0.651118</td>\n",
|
||
|
" <td>-0.319318</td>\n",
|
||
|
" <td>-0.848077</td>\n",
|
||
|
" <td>0.605965</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>WY</th>\n",
|
||
|
" <td>-2.018168</td>\n",
|
||
|
" <td>0.740122</td>\n",
|
||
|
" <td>0.528813</td>\n",
|
||
|
" <td>-0.589001</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>OR</th>\n",
|
||
|
" <td>0.188695</td>\n",
|
||
|
" <td>-0.758872</td>\n",
|
||
|
" <td>-0.933237</td>\n",
|
||
|
" <td>0.955057</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>CO</th>\n",
|
||
|
" <td>0.190794</td>\n",
|
||
|
" <td>1.978757</td>\n",
|
||
|
" <td>2.605967</td>\n",
|
||
|
" <td>0.683509</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" W X Y Z\n",
|
||
|
"States \n",
|
||
|
"CA 2.706850 0.628133 0.907969 0.503826\n",
|
||
|
"NY 0.651118 -0.319318 -0.848077 0.605965\n",
|
||
|
"WY -2.018168 0.740122 0.528813 -0.589001\n",
|
||
|
"OR 0.188695 -0.758872 -0.933237 0.955057\n",
|
||
|
"CO 0.190794 1.978757 2.605967 0.683509"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 214,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df.set_index('States')"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 215,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>W</th>\n",
|
||
|
" <th>X</th>\n",
|
||
|
" <th>Y</th>\n",
|
||
|
" <th>Z</th>\n",
|
||
|
" <th>States</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>A</th>\n",
|
||
|
" <td>2.706850</td>\n",
|
||
|
" <td>0.628133</td>\n",
|
||
|
" <td>0.907969</td>\n",
|
||
|
" <td>0.503826</td>\n",
|
||
|
" <td>CA</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>B</th>\n",
|
||
|
" <td>0.651118</td>\n",
|
||
|
" <td>-0.319318</td>\n",
|
||
|
" <td>-0.848077</td>\n",
|
||
|
" <td>0.605965</td>\n",
|
||
|
" <td>NY</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>C</th>\n",
|
||
|
" <td>-2.018168</td>\n",
|
||
|
" <td>0.740122</td>\n",
|
||
|
" <td>0.528813</td>\n",
|
||
|
" <td>-0.589001</td>\n",
|
||
|
" <td>WY</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>D</th>\n",
|
||
|
" <td>0.188695</td>\n",
|
||
|
" <td>-0.758872</td>\n",
|
||
|
" <td>-0.933237</td>\n",
|
||
|
" <td>0.955057</td>\n",
|
||
|
" <td>OR</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>E</th>\n",
|
||
|
" <td>0.190794</td>\n",
|
||
|
" <td>1.978757</td>\n",
|
||
|
" <td>2.605967</td>\n",
|
||
|
" <td>0.683509</td>\n",
|
||
|
" <td>CO</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" W X Y Z States\n",
|
||
|
"A 2.706850 0.628133 0.907969 0.503826 CA\n",
|
||
|
"B 0.651118 -0.319318 -0.848077 0.605965 NY\n",
|
||
|
"C -2.018168 0.740122 0.528813 -0.589001 WY\n",
|
||
|
"D 0.188695 -0.758872 -0.933237 0.955057 OR\n",
|
||
|
"E 0.190794 1.978757 2.605967 0.683509 CO"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 215,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 216,
|
||
|
"metadata": {
|
||
|
"collapsed": true
|
||
|
},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"df.set_index('States',inplace=True)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 218,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>W</th>\n",
|
||
|
" <th>X</th>\n",
|
||
|
" <th>Y</th>\n",
|
||
|
" <th>Z</th>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>States</th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>CA</th>\n",
|
||
|
" <td>2.706850</td>\n",
|
||
|
" <td>0.628133</td>\n",
|
||
|
" <td>0.907969</td>\n",
|
||
|
" <td>0.503826</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>NY</th>\n",
|
||
|
" <td>0.651118</td>\n",
|
||
|
" <td>-0.319318</td>\n",
|
||
|
" <td>-0.848077</td>\n",
|
||
|
" <td>0.605965</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>WY</th>\n",
|
||
|
" <td>-2.018168</td>\n",
|
||
|
" <td>0.740122</td>\n",
|
||
|
" <td>0.528813</td>\n",
|
||
|
" <td>-0.589001</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>OR</th>\n",
|
||
|
" <td>0.188695</td>\n",
|
||
|
" <td>-0.758872</td>\n",
|
||
|
" <td>-0.933237</td>\n",
|
||
|
" <td>0.955057</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>CO</th>\n",
|
||
|
" <td>0.190794</td>\n",
|
||
|
" <td>1.978757</td>\n",
|
||
|
" <td>2.605967</td>\n",
|
||
|
" <td>0.683509</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" W X Y Z\n",
|
||
|
"States \n",
|
||
|
"CA 2.706850 0.628133 0.907969 0.503826\n",
|
||
|
"NY 0.651118 -0.319318 -0.848077 0.605965\n",
|
||
|
"WY -2.018168 0.740122 0.528813 -0.589001\n",
|
||
|
"OR 0.188695 -0.758872 -0.933237 0.955057\n",
|
||
|
"CO 0.190794 1.978757 2.605967 0.683509"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 218,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Multi-Index and Index Hierarchy\n",
|
||
|
"\n",
|
||
|
"Let us go over how to work with Multi-Index, first we'll create a quick example of what a Multi-Indexed DataFrame would look like:"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 253,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Index Levels\n",
|
||
|
"outside = ['G1','G1','G1','G2','G2','G2']\n",
|
||
|
"inside = [1,2,3,1,2,3]\n",
|
||
|
"hier_index = list(zip(outside,inside))\n",
|
||
|
"hier_index = pd.MultiIndex.from_tuples(hier_index)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 254,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"MultiIndex(levels=[['G1', 'G2'], [1, 2, 3]],\n",
|
||
|
" labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 254,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"hier_index"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 257,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>A</th>\n",
|
||
|
" <th>B</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th rowspan=\"3\" valign=\"top\">G1</th>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>0.153661</td>\n",
|
||
|
" <td>0.167638</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>-0.765930</td>\n",
|
||
|
" <td>0.962299</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>0.902826</td>\n",
|
||
|
" <td>-0.537909</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th rowspan=\"3\" valign=\"top\">G2</th>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>-1.549671</td>\n",
|
||
|
" <td>0.435253</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>1.259904</td>\n",
|
||
|
" <td>-0.447898</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>0.266207</td>\n",
|
||
|
" <td>0.412580</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" A B\n",
|
||
|
"G1 1 0.153661 0.167638\n",
|
||
|
" 2 -0.765930 0.962299\n",
|
||
|
" 3 0.902826 -0.537909\n",
|
||
|
"G2 1 -1.549671 0.435253\n",
|
||
|
" 2 1.259904 -0.447898\n",
|
||
|
" 3 0.266207 0.412580"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 257,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df = pd.DataFrame(np.random.randn(6,2),index=hier_index,columns=['A','B'])\n",
|
||
|
"df"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"Now let's show how to index this! For index hierarchy we use df.loc[], if this was on the columns axis, you would just use normal bracket notation df[]. Calling one level of the index returns the sub-dataframe:"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 260,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>A</th>\n",
|
||
|
" <th>B</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>0.153661</td>\n",
|
||
|
" <td>0.167638</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>-0.765930</td>\n",
|
||
|
" <td>0.962299</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>0.902826</td>\n",
|
||
|
" <td>-0.537909</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" A B\n",
|
||
|
"1 0.153661 0.167638\n",
|
||
|
"2 -0.765930 0.962299\n",
|
||
|
"3 0.902826 -0.537909"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 260,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df.loc['G1']"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 263,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"A 0.153661\n",
|
||
|
"B 0.167638\n",
|
||
|
"Name: 1, dtype: float64"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 263,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df.loc['G1'].loc[1]"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 265,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"FrozenList([None, None])"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 265,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df.index.names"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 266,
|
||
|
"metadata": {
|
||
|
"collapsed": true
|
||
|
},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"df.index.names = ['Group','Num']"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 267,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>A</th>\n",
|
||
|
" <th>B</th>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>Group</th>\n",
|
||
|
" <th>Num</th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th rowspan=\"3\" valign=\"top\">G1</th>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>0.153661</td>\n",
|
||
|
" <td>0.167638</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>-0.765930</td>\n",
|
||
|
" <td>0.962299</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>0.902826</td>\n",
|
||
|
" <td>-0.537909</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th rowspan=\"3\" valign=\"top\">G2</th>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>-1.549671</td>\n",
|
||
|
" <td>0.435253</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>1.259904</td>\n",
|
||
|
" <td>-0.447898</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>0.266207</td>\n",
|
||
|
" <td>0.412580</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" A B\n",
|
||
|
"Group Num \n",
|
||
|
"G1 1 0.153661 0.167638\n",
|
||
|
" 2 -0.765930 0.962299\n",
|
||
|
" 3 0.902826 -0.537909\n",
|
||
|
"G2 1 -1.549671 0.435253\n",
|
||
|
" 2 1.259904 -0.447898\n",
|
||
|
" 3 0.266207 0.412580"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 267,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 270,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>A</th>\n",
|
||
|
" <th>B</th>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>Num</th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>0.153661</td>\n",
|
||
|
" <td>0.167638</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>-0.765930</td>\n",
|
||
|
" <td>0.962299</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>0.902826</td>\n",
|
||
|
" <td>-0.537909</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" A B\n",
|
||
|
"Num \n",
|
||
|
"1 0.153661 0.167638\n",
|
||
|
"2 -0.765930 0.962299\n",
|
||
|
"3 0.902826 -0.537909"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 270,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df.xs('G1')"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 271,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"A 0.153661\n",
|
||
|
"B 0.167638\n",
|
||
|
"Name: (G1, 1), dtype: float64"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 271,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df.xs(['G1',1])"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 273,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>A</th>\n",
|
||
|
" <th>B</th>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>Group</th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>G1</th>\n",
|
||
|
" <td>0.153661</td>\n",
|
||
|
" <td>0.167638</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>G2</th>\n",
|
||
|
" <td>-1.549671</td>\n",
|
||
|
" <td>0.435253</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" A B\n",
|
||
|
"Group \n",
|
||
|
"G1 0.153661 0.167638\n",
|
||
|
"G2 -1.549671 0.435253"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 273,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df.xs(1,level='Num')"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Great Job!"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.5.1"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 0
|
||
|
}
|