introduction-to-deep-learning/Le Deep Learning de A a Z/Annexe - Data_Preprocessing.../Categorical Data.ipynb

116 lines
2.7 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Importing the libraries\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Importing the dataset\n",
"dataset = pd.read_csv('Data.csv')\n",
"X = dataset.iloc[:, :-1].values\n",
"y = dataset.iloc[:, 3].values"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Taking care of missing data\n",
"from sklearn.impute import SimpleImputer\n",
"imputer = SimpleImputer(missing_values = np.nan, strategy = 'mean')\n",
"imputer.fit(X[:, 1:3])\n",
"X[:, 1:3] = imputer.transform(X[:, 1:3])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"X"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Encoding categorical data\n",
"from sklearn.preprocessing import OneHotEncoder, LabelEncode\n",
"from sklearn.compose import make_column_transformer\n",
"preprocess = make_column_transformer(\n",
" (OneHotEncoder(), [0]),\n",
" remainder = \"passthrough\"\n",
")\n",
"X = preprocess.fit_transform(X)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"labelencoder_y = LabelEncoder()\n",
"y = labelencoder_y.fit_transform(y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Encoding categorical data\n",
"# Encoding the Independent Variable\n",
"from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n",
"labelencoder_X = LabelEncoder()\n",
"X[:, 0] = labelencoder_X.fit_transform(X[:, 0])\n",
"onehotencoder = OneHotEncoder(categories = [0])\n",
"X = onehotencoder.fit_transform(X).toarray()\n",
"# Encoding the Dependent Variable\n",
"labelencoder_y = LabelEncoder()\n",
"y = labelencoder_y.fit_transform(y)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.10"
}
},
"nbformat": 4,
"nbformat_minor": 4
}