120 lines
4.0 KiB
Python
120 lines
4.0 KiB
Python
|
|
# Import libraries
|
|
import pandas as pd
|
|
import numpy as np
|
|
|
|
# Import data
|
|
dataset = pd.read_csv('data/Churn_Modelling.csv')
|
|
X = dataset.iloc[:, 3:13]
|
|
y = dataset.iloc[:, 13]
|
|
|
|
# Encode categorical data and scale continuous data
|
|
from sklearn.preprocessing import OneHotEncoder, StandardScaler
|
|
from sklearn.compose import make_column_transformer
|
|
preprocess = make_column_transformer(
|
|
(OneHotEncoder(), ['Geography', 'Gender']),
|
|
(StandardScaler(), ['CreditScore', 'Age', 'Tenure', 'Balance',
|
|
'NumOfProducts', 'HasCrCard', 'IsActiveMember',
|
|
'EstimatedSalary']))
|
|
X = preprocess.fit_transform(X)
|
|
X = np.delete(X, [0,3], 1)
|
|
|
|
# Split in train/test
|
|
y = y.values
|
|
from sklearn.model_selection import train_test_split
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
|
|
|
|
# Part 2 - Now let's make the ANN!
|
|
|
|
# Importing the Keras libraries and packages
|
|
from keras.models import Sequential
|
|
from keras.layers import Dense
|
|
|
|
# Initialising the ANN
|
|
classifier = Sequential()
|
|
|
|
# Adding the input layer and the first hidden layer
|
|
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
|
|
|
|
# Adding the second hidden layer
|
|
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
|
|
|
|
# Adding the output layer
|
|
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
|
|
|
|
# Compiling the ANN
|
|
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
|
|
|
|
# Fitting the ANN to the Training set
|
|
classifier.fit(X_train, y_train, batch_size = 10, epochs = 100)
|
|
|
|
# Part 3 - Making predictions and evaluating the model
|
|
|
|
# Predicting the Test set results
|
|
y_pred = classifier.predict(X_test)
|
|
y_pred = (y_pred > 0.5)
|
|
|
|
# Predicting a single new observation
|
|
"""Predict if the customer with the following informations will leave the bank:
|
|
Geography: France
|
|
Credit Score: 600
|
|
Gender: Male
|
|
Age: 40
|
|
Tenure: 3
|
|
Balance: 60000
|
|
Number of Products: 2
|
|
Has Credit Card: Yes
|
|
Is Active Member: Yes
|
|
Estimated Salary: 50000"""
|
|
Xnew = pd.DataFrame(data={
|
|
'CreditScore': [600],
|
|
'Geography': ['France'],
|
|
'Gender': ['Male'],
|
|
'Age': [40],
|
|
'Tenure': [3],
|
|
'Balance': [60000],
|
|
'NumOfProducts': [2],
|
|
'HasCrCard': [1],
|
|
'IsActiveMember': [1],
|
|
'EstimatedSalary': [50000]})
|
|
Xnew = preprocess.transform(Xnew)
|
|
Xnew = np.delete(Xnew, [0,3], 1)
|
|
new_prediction = classifier.predict(Xnew)
|
|
new_prediction = (new_prediction > 0.5)
|
|
|
|
# Making the Confusion Matrix
|
|
from sklearn.metrics import confusion_matrix
|
|
cm = confusion_matrix(y_test, y_pred)
|
|
|
|
# Evaluate
|
|
def build_classifier(optimizer='adam'):
|
|
classifier = Sequential()
|
|
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
|
|
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
|
|
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
|
|
classifier.compile(optimizer = optimizer, loss = 'binary_crossentropy', metrics = ['accuracy'])
|
|
return classifier
|
|
|
|
# Evaluate
|
|
from keras.wrappers.scikit_learn import KerasClassifier
|
|
from sklearn.model_selection import cross_val_score
|
|
classifier = KerasClassifier(build_fn = build_classifier, batch_size = 10, epochs = 100)
|
|
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train,
|
|
cv = 10, n_jobs = -1)
|
|
mean = accuracies.mean()
|
|
variance = accuracies.std()
|
|
|
|
# Tune
|
|
from sklearn.model_selection import GridSearchCV
|
|
classifier = KerasClassifier(build_fn = build_classifier)
|
|
parameters = {'batch_size': [25, 32],
|
|
'epochs': [100, 500],
|
|
'optimizer': ['adam', 'rmsprop']}
|
|
grid_search = GridSearchCV(estimator = classifier,
|
|
param_grid = parameters,
|
|
scoring = 'accuracy',
|
|
cv = 10)
|
|
grid_search = grid_search.fit(X_train, y_train)
|
|
best_parameters = grid_search.best_params_
|
|
best_accuracy = grid_search.best_score_
|