{ "cells": [ { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "# Opérations\n", "\n", "Il y a beaucoup d'opérations avec pandas qui vous seront vraiment utiles, mais elles n'entraient dans aucune case. Montrons-les ici dans ce notebook:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " col1 col2 col3\n", "0 1 444 abc\n", "1 2 555 def\n", "2 3 666 ghi\n", "3 4 444 xyz" ], "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
col1col2col3
01444abc
12555def
23666ghi
34444xyz
\n
" }, "metadata": {}, "execution_count": 1 } ], "source": [ "import pandas as pd\n", "df = pd.DataFrame({'col1':[1,2,3,4],'col2':[444,555,666,444],'col3':['abc','def','ghi','xyz']})\n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Informations sur les valeurs uniques" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([444, 555, 666], dtype=int64)" ] }, "metadata": {}, "execution_count": 2 } ], "source": [ "df['col2'].unique()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "3" ] }, "metadata": {}, "execution_count": 3 } ], "source": [ "df['col2'].nunique()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "444 2\n", "555 1\n", "666 1\n", "Name: col2, dtype: int64" ] }, "metadata": {}, "execution_count": 4 } ], "source": [ "df['col2'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Sélection des données" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# Sélection à partir d'un DataFrame en utilisant des critères \n", "# de plusieurs colonnes\n", "newdf = df[(df['col1']>2) & (df['col2']==444)]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " col1 col2 col3\n", "3 4 444 xyz" ], "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
col1col2col3
34444xyz
\n
" }, "metadata": {}, "execution_count": 6 } ], "source": [ "newdf" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Appliquer des Fonctions" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def times2(x):\n", " return x*2" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0 2\n", "1 4\n", "2 6\n", "3 8\n", "Name: col1, dtype: int64" ] }, "metadata": {}, "execution_count": 8 } ], "source": [ "df['col1'].apply(times2)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0 3\n", "1 3\n", "2 3\n", "3 3\n", "Name: col3, dtype: int64" ] }, "metadata": {}, "execution_count": 9 } ], "source": [ "df['col3'].apply(len)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "10" ] }, "metadata": {}, "execution_count": 10 } ], "source": [ "df['col1'].sum()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Suppression définitive d'une colonne**" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "del df['col1']" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " col2 col3\n", "0 444 abc\n", "1 555 def\n", "2 666 ghi\n", "3 444 xyz" ], "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
col2col3
0444abc
1555def
2666ghi
3444xyz
\n
" }, "metadata": {}, "execution_count": 12 } ], "source": [ "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Obtenir les noms des colonnes et des index :**" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "Index(['col2', 'col3'], dtype='object')" ] }, "metadata": {}, "execution_count": 13 } ], "source": [ "df.columns" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "RangeIndex(start=0, stop=4, step=1)" ] }, "metadata": {}, "execution_count": 14 } ], "source": [ "df.index" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Trier un DataFrame :**" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " col2 col3\n", "0 444 abc\n", "1 555 def\n", "2 666 ghi\n", "3 444 xyz" ], "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
col2col3
0444abc
1555def
2666ghi
3444xyz
\n
" }, "metadata": {}, "execution_count": 15 } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " col2 col3\n", "0 444 abc\n", "3 444 xyz\n", "1 555 def\n", "2 666 ghi" ], "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
col2col3
0444abc
3444xyz
1555def
2666ghi
\n
" }, "metadata": {}, "execution_count": 16 } ], "source": [ "df.sort_values(by='col2') #inplace=False par défaut" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Recherche de valeurs nulles ou vérification des valeurs nulles**" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " col2 col3\n", "0 False False\n", "1 False False\n", "2 False False\n", "3 False False" ], "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
col2col3
0FalseFalse
1FalseFalse
2FalseFalse
3FalseFalse
\n
" }, "metadata": {}, "execution_count": 17 } ], "source": [ "df.isnull()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " col2 col3\n", "0 444 abc\n", "1 555 def\n", "2 666 ghi\n", "3 444 xyz" ], "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
col2col3
0444abc
1555def
2666ghi
3444xyz
\n
" }, "metadata": {}, "execution_count": 18 } ], "source": [ "# Supprimer les lignes contenant des valeurs NaN\n", "df.dropna()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Remplir les valeurs NaN avec autre chose :**" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " col1 col2 col3\n", "0 1.0 NaN abc\n", "1 2.0 555.0 def\n", "2 3.0 666.0 ghi\n", "3 NaN 444.0 xyz" ], "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
col1col2col3
01.0NaNabc
12.0555.0def
23.0666.0ghi
3NaN444.0xyz
\n
" }, "metadata": {}, "execution_count": 20 } ], "source": [ "df = pd.DataFrame({'col1':[1,2,3,np.nan],\n", " 'col2':[np.nan,555,666,444],\n", " 'col3':['abc','def','ghi','xyz']})\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " col1 col2 col3\n", "0 1 FILL abc\n", "1 2 555 def\n", "2 3 666 ghi\n", "3 FILL 444 xyz" ], "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
col1col2col3
01FILLabc
12555def
23666ghi
3FILL444xyz
\n
" }, "metadata": {}, "execution_count": 21 } ], "source": [ "df.fillna('FILL')" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": true }, "outputs": [], "source": [ "data = {'A':['foo','foo','foo','bar','bar','bar'],\n", " 'B':['one','one','two','two','one','one'],\n", " 'C':['x','y','x','y','x','y'],\n", " 'D':[1,3,2,5,4,1]}\n", "\n", "df = pd.DataFrame(data)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " A B C D\n", "0 foo one x 1\n", "1 foo one y 3\n", "2 foo two x 2\n", "3 bar two y 5\n", "4 bar one x 4\n", "5 bar one y 1" ], "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ABCD
0fooonex1
1foooney3
2footwox2
3bartwoy5
4baronex4
5baroney1
\n
" }, "metadata": {}, "execution_count": 23 } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "C x y\n", "A B \n", "bar one 4.0 1.0\n", " two NaN 5.0\n", "foo one 1.0 3.0\n", " two 2.0 NaN" ], "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Cxy
AB
barone4.01.0
twoNaN5.0
fooone1.03.0
two2.0NaN
\n
" }, "metadata": {}, "execution_count": 24 } ], "source": [ "df.pivot_table(values='D',index=['A', 'B'],columns=['C'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Bon travail!" ] } ], "metadata": { "kernelspec": { "name": "python3", "display_name": "Python 3.7.9 64-bit ('pyfinance': conda)", "metadata": { "interpreter": { "hash": "e89404a230d8800c54ad520c7b67d1bd9bb833a07b37dd3e521a178a3dc34904" } } }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.9-final" } }, "nbformat": 4, "nbformat_minor": 1 }