{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "___\n", "\n", " \n", "___" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "# Operations\n", "\n", "There are lots of operations with pandas that will be really useful to you, but don't fall into any distinct category. Let's show them here in this lecture:" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
col1col2col3
01444abc
12555def
23666ghi
34444xyz
\n", "
" ], "text/plain": [ " col1 col2 col3\n", "0 1 444 abc\n", "1 2 555 def\n", "2 3 666 ghi\n", "3 4 444 xyz" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "df = pd.DataFrame({'col1':[1,2,3,4],'col2':[444,555,666,444],'col3':['abc','def','ghi','xyz']})\n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Info on Unique Values" ] }, { "cell_type": "code", "execution_count": 53, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([444, 555, 666])" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['col2'].unique()" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "3" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['col2'].nunique()" ] }, { "cell_type": "code", "execution_count": 55, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "444 2\n", "555 1\n", "666 1\n", "Name: col2, dtype: int64" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['col2'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Selecting Data" ] }, { "cell_type": "code", "execution_count": 56, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#Select from DataFrame using criteria from multiple columns\n", "newdf = df[(df['col1']>2) & (df['col2']==444)]" ] }, { "cell_type": "code", "execution_count": 57, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
col1col2col3
34444xyz
\n", "
" ], "text/plain": [ " col1 col2 col3\n", "3 4 444 xyz" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "newdf" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Applying Functions" ] }, { "cell_type": "code", "execution_count": 58, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def times2(x):\n", " return x*2" ] }, { "cell_type": "code", "execution_count": 59, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 2\n", "1 4\n", "2 6\n", "3 8\n", "Name: col1, dtype: int64" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['col1'].apply(times2)" ] }, { "cell_type": "code", "execution_count": 60, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 3\n", "1 3\n", "2 3\n", "3 3\n", "Name: col3, dtype: int64" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['col3'].apply(len)" ] }, { "cell_type": "code", "execution_count": 61, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "10" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['col1'].sum()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "** Permanently Removing a Column**" ] }, { "cell_type": "code", "execution_count": 62, "metadata": { "collapsed": false }, "outputs": [], "source": [ "del df['col1']" ] }, { "cell_type": "code", "execution_count": 63, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
col2col3
0444abc
1555def
2666ghi
3444xyz
\n", "
" ], "text/plain": [ " col2 col3\n", "0 444 abc\n", "1 555 def\n", "2 666 ghi\n", "3 444 xyz" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "** Get column and index names: **" ] }, { "cell_type": "code", "execution_count": 64, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Index(['col2', 'col3'], dtype='object')" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns" ] }, { "cell_type": "code", "execution_count": 65, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "RangeIndex(start=0, stop=4, step=1)" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.index" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "** Sorting and Ordering a DataFrame:**" ] }, { "cell_type": "code", "execution_count": 66, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
col2col3
0444abc
1555def
2666ghi
3444xyz
\n", "
" ], "text/plain": [ " col2 col3\n", "0 444 abc\n", "1 555 def\n", "2 666 ghi\n", "3 444 xyz" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 67, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
col2col3
0444abc
3444xyz
1555def
2666ghi
\n", "
" ], "text/plain": [ " col2 col3\n", "0 444 abc\n", "3 444 xyz\n", "1 555 def\n", "2 666 ghi" ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.sort_values(by='col2') #inplace=False by default" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "** Find Null Values or Check for Null Values**" ] }, { "cell_type": "code", "execution_count": 68, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
col2col3
0FalseFalse
1FalseFalse
2FalseFalse
3FalseFalse
\n", "
" ], "text/plain": [ " col2 col3\n", "0 False False\n", "1 False False\n", "2 False False\n", "3 False False" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.isnull()" ] }, { "cell_type": "code", "execution_count": 69, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
col2col3
0444abc
1555def
2666ghi
3444xyz
\n", "
" ], "text/plain": [ " col2 col3\n", "0 444 abc\n", "1 555 def\n", "2 666 ghi\n", "3 444 xyz" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Drop rows with NaN Values\n", "df.dropna()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "** Filling in NaN values with something else: **" ] }, { "cell_type": "code", "execution_count": 71, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 72, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
col1col2col3
01.0NaNabc
12.0555.0def
23.0666.0ghi
3NaN444.0xyz
\n", "
" ], "text/plain": [ " col1 col2 col3\n", "0 1.0 NaN abc\n", "1 2.0 555.0 def\n", "2 3.0 666.0 ghi\n", "3 NaN 444.0 xyz" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame({'col1':[1,2,3,np.nan],\n", " 'col2':[np.nan,555,666,444],\n", " 'col3':['abc','def','ghi','xyz']})\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 75, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
col1col2col3
01FILLabc
12555def
23666ghi
3FILL444xyz
\n", "
" ], "text/plain": [ " col1 col2 col3\n", "0 1 FILL abc\n", "1 2 555 def\n", "2 3 666 ghi\n", "3 FILL 444 xyz" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.fillna('FILL')" ] }, { "cell_type": "code", "execution_count": 89, "metadata": { "collapsed": true }, "outputs": [], "source": [ "data = {'A':['foo','foo','foo','bar','bar','bar'],\n", " 'B':['one','one','two','two','one','one'],\n", " 'C':['x','y','x','y','x','y'],\n", " 'D':[1,3,2,5,4,1]}\n", "\n", "df = pd.DataFrame(data)" ] }, { "cell_type": "code", "execution_count": 90, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
0fooonex1
1foooney3
2footwox2
3bartwoy5
4baronex4
5baroney1
\n", "
" ], "text/plain": [ " A B C D\n", "0 foo one x 1\n", "1 foo one y 3\n", "2 foo two x 2\n", "3 bar two y 5\n", "4 bar one x 4\n", "5 bar one y 1" ] }, "execution_count": 90, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 91, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Cxy
AB
barone4.01.0
twoNaN5.0
fooone1.03.0
two2.0NaN
\n", "
" ], "text/plain": [ "C x y\n", "A B \n", "bar one 4.0 1.0\n", " two NaN 5.0\n", "foo one 1.0 3.0\n", " two 2.0 NaN" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.pivot_table(values='D',index=['A', 'B'],columns=['C'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Great Job!" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.1" } }, "nbformat": 4, "nbformat_minor": 0 }