116 lines
2.7 KiB
Plaintext
116 lines
2.7 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Importing the libraries\n",
|
|
"import numpy as np\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import pandas as pd"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Importing the dataset\n",
|
|
"dataset = pd.read_csv('Data.csv')\n",
|
|
"X = dataset.iloc[:, :-1].values\n",
|
|
"y = dataset.iloc[:, 3].values"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Taking care of missing data\n",
|
|
"from sklearn.impute import SimpleImputer\n",
|
|
"imputer = SimpleImputer(missing_values = np.nan, strategy = 'mean')\n",
|
|
"imputer.fit(X[:, 1:3])\n",
|
|
"X[:, 1:3] = imputer.transform(X[:, 1:3])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"X"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Encoding categorical data\n",
|
|
"from sklearn.preprocessing import OneHotEncoder, LabelEncode\n",
|
|
"from sklearn.compose import make_column_transformer\n",
|
|
"preprocess = make_column_transformer(\n",
|
|
" (OneHotEncoder(), [0]),\n",
|
|
" remainder = \"passthrough\"\n",
|
|
")\n",
|
|
"X = preprocess.fit_transform(X)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"labelencoder_y = LabelEncoder()\n",
|
|
"y = labelencoder_y.fit_transform(y)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Encoding categorical data\n",
|
|
"# Encoding the Independent Variable\n",
|
|
"from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n",
|
|
"labelencoder_X = LabelEncoder()\n",
|
|
"X[:, 0] = labelencoder_X.fit_transform(X[:, 0])\n",
|
|
"onehotencoder = OneHotEncoder(categories = [0])\n",
|
|
"X = onehotencoder.fit_transform(X).toarray()\n",
|
|
"# Encoding the Dependent Variable\n",
|
|
"labelencoder_y = LabelEncoder()\n",
|
|
"y = labelencoder_y.fit_transform(y)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.6.10"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|