python-pour-finance/03-Pandas/.ipynb_checkpoints/07-Operations-checkpoint.ipynb

1071 lines
22 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"___\n",
"\n",
"<a href='http://www.pieriandata.com'> <img src='../Pierian_Data_Logo.png' /></a>\n",
"___"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"# Operations\n",
"\n",
"There are lots of operations with pandas that will be really useful to you, but don't fall into any distinct category. Let's show them here in this lecture:"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>col1</th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>444</td>\n",
" <td>abc</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>555</td>\n",
" <td>def</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>666</td>\n",
" <td>ghi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>444</td>\n",
" <td>xyz</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" col1 col2 col3\n",
"0 1 444 abc\n",
"1 2 555 def\n",
"2 3 666 ghi\n",
"3 4 444 xyz"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"df = pd.DataFrame({'col1':[1,2,3,4],'col2':[444,555,666,444],'col3':['abc','def','ghi','xyz']})\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Info on Unique Values"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([444, 555, 666])"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['col2'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"3"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['col2'].nunique()"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"444 2\n",
"555 1\n",
"666 1\n",
"Name: col2, dtype: int64"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['col2'].value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Selecting Data"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#Select from DataFrame using criteria from multiple columns\n",
"newdf = df[(df['col1']>2) & (df['col2']==444)]"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>col1</th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>444</td>\n",
" <td>xyz</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" col1 col2 col3\n",
"3 4 444 xyz"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"newdf"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Applying Functions"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def times2(x):\n",
" return x*2"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 2\n",
"1 4\n",
"2 6\n",
"3 8\n",
"Name: col1, dtype: int64"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['col1'].apply(times2)"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 3\n",
"1 3\n",
"2 3\n",
"3 3\n",
"Name: col3, dtype: int64"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['col3'].apply(len)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"10"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['col1'].sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"** Permanently Removing a Column**"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"del df['col1']"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>444</td>\n",
" <td>abc</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>555</td>\n",
" <td>def</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>666</td>\n",
" <td>ghi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>444</td>\n",
" <td>xyz</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" col2 col3\n",
"0 444 abc\n",
"1 555 def\n",
"2 666 ghi\n",
"3 444 xyz"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"** Get column and index names: **"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['col2', 'col3'], dtype='object')"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"RangeIndex(start=0, stop=4, step=1)"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.index"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"** Sorting and Ordering a DataFrame:**"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>444</td>\n",
" <td>abc</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>555</td>\n",
" <td>def</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>666</td>\n",
" <td>ghi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>444</td>\n",
" <td>xyz</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" col2 col3\n",
"0 444 abc\n",
"1 555 def\n",
"2 666 ghi\n",
"3 444 xyz"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>444</td>\n",
" <td>abc</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>444</td>\n",
" <td>xyz</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>555</td>\n",
" <td>def</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>666</td>\n",
" <td>ghi</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" col2 col3\n",
"0 444 abc\n",
"3 444 xyz\n",
"1 555 def\n",
"2 666 ghi"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sort_values(by='col2') #inplace=False by default"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"** Find Null Values or Check for Null Values**"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" col2 col3\n",
"0 False False\n",
"1 False False\n",
"2 False False\n",
"3 False False"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.isnull()"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>444</td>\n",
" <td>abc</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>555</td>\n",
" <td>def</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>666</td>\n",
" <td>ghi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>444</td>\n",
" <td>xyz</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" col2 col3\n",
"0 444 abc\n",
"1 555 def\n",
"2 666 ghi\n",
"3 444 xyz"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Drop rows with NaN Values\n",
"df.dropna()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"** Filling in NaN values with something else: **"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>col1</th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>abc</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2.0</td>\n",
" <td>555.0</td>\n",
" <td>def</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3.0</td>\n",
" <td>666.0</td>\n",
" <td>ghi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>NaN</td>\n",
" <td>444.0</td>\n",
" <td>xyz</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" col1 col2 col3\n",
"0 1.0 NaN abc\n",
"1 2.0 555.0 def\n",
"2 3.0 666.0 ghi\n",
"3 NaN 444.0 xyz"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame({'col1':[1,2,3,np.nan],\n",
" 'col2':[np.nan,555,666,444],\n",
" 'col3':['abc','def','ghi','xyz']})\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>col1</th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>FILL</td>\n",
" <td>abc</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>555</td>\n",
" <td>def</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>666</td>\n",
" <td>ghi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>FILL</td>\n",
" <td>444</td>\n",
" <td>xyz</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" col1 col2 col3\n",
"0 1 FILL abc\n",
"1 2 555 def\n",
"2 3 666 ghi\n",
"3 FILL 444 xyz"
]
},
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.fillna('FILL')"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data = {'A':['foo','foo','foo','bar','bar','bar'],\n",
" 'B':['one','one','two','two','one','one'],\n",
" 'C':['x','y','x','y','x','y'],\n",
" 'D':[1,3,2,5,4,1]}\n",
"\n",
"df = pd.DataFrame(data)"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>foo</td>\n",
" <td>one</td>\n",
" <td>x</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>foo</td>\n",
" <td>one</td>\n",
" <td>y</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>foo</td>\n",
" <td>two</td>\n",
" <td>x</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>bar</td>\n",
" <td>two</td>\n",
" <td>y</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>bar</td>\n",
" <td>one</td>\n",
" <td>x</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>bar</td>\n",
" <td>one</td>\n",
" <td>y</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D\n",
"0 foo one x 1\n",
"1 foo one y 3\n",
"2 foo two x 2\n",
"3 bar two y 5\n",
"4 bar one x 4\n",
"5 bar one y 1"
]
},
"execution_count": 90,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" </tr>\n",
" <tr>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">bar</th>\n",
" <th>one</th>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>two</th>\n",
" <td>NaN</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">foo</th>\n",
" <th>one</th>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>two</th>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"C x y\n",
"A B \n",
"bar one 4.0 1.0\n",
" two NaN 5.0\n",
"foo one 1.0 3.0\n",
" two 2.0 NaN"
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.pivot_table(values='D',index=['A', 'B'],columns=['C'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Great Job!"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1"
}
},
"nbformat": 4,
"nbformat_minor": 0
}