28 lines
792 B
Python
28 lines
792 B
Python
# Data Preprocessing
|
|
|
|
# Importing the libraries
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import pandas as pd
|
|
|
|
# Importing the dataset
|
|
dataset = pd.read_csv('Data.csv')
|
|
X = dataset.iloc[:, :-1].values
|
|
y = dataset.iloc[:, 3].values
|
|
|
|
# Taking care of missing data
|
|
from sklearn.impute import SimpleImputer
|
|
imputer = SimpleImputer(missing_values = np.nan, strategy = 'mean')
|
|
imputer.fit(X[:, 1:3])
|
|
X[:, 1:3] = imputer.transform(X[:, 1:3])
|
|
|
|
# Encoding categorical data
|
|
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
|
|
from sklearn.compose import make_column_transformer
|
|
preprocess = make_column_transformer(
|
|
(OneHotEncoder(), [0]),
|
|
remainder = "passthrough"
|
|
)
|
|
X = preprocess.fit_transform(X)
|
|
labelencoder_y = LabelEncoder()
|
|
y = labelencoder_y.fit_transform(y) |