39 lines
1.3 KiB
Python
39 lines
1.3 KiB
Python
|
# K-Means
|
||
|
|
||
|
# Importer les librairies
|
||
|
import numpy as np
|
||
|
import matplotlib.pyplot as plt
|
||
|
import pandas as pd
|
||
|
|
||
|
# Importer le dataset
|
||
|
dataset = pd.read_csv('Mall_Customers.csv')
|
||
|
X = dataset.iloc[:, [3, 4]].values
|
||
|
|
||
|
# Utiliser la méthode elbow pour trouver le nombre optimal de clusters
|
||
|
from sklearn.cluster import KMeans
|
||
|
wcss = []
|
||
|
for i in range(1, 11):
|
||
|
kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 0)
|
||
|
kmeans.fit(X)
|
||
|
wcss.append(kmeans.inertia_)
|
||
|
plt.plot(range(1, 11), wcss)
|
||
|
plt.title('La méthode Elbow')
|
||
|
plt.xlabel('Nombre de clusters')
|
||
|
plt.ylabel('WCSS')
|
||
|
plt.show()
|
||
|
|
||
|
# Construction du modèle
|
||
|
from sklearn.cluster import KMeans
|
||
|
kmeans = KMeans(n_clusters = 5, init = 'k-means++', random_state = 0)
|
||
|
y_kmeans = kmeans.fit_predict(X)
|
||
|
|
||
|
# Visualiser les résultats
|
||
|
plt.scatter(X[y_kmeans == 1, 0], X[y_kmeans == 1, 1], c = 'red', label = 'Cluster 1')
|
||
|
plt.scatter(X[y_kmeans == 2, 0], X[y_kmeans == 2, 1], c = 'blue', label = 'Cluster 2')
|
||
|
plt.scatter(X[y_kmeans == 3, 0], X[y_kmeans == 3, 1], c = 'green', label = 'Cluster 3')
|
||
|
plt.scatter(X[y_kmeans == 4, 0], X[y_kmeans == 4, 1], c = 'cyan', label = 'Cluster 4')
|
||
|
plt.scatter(X[y_kmeans == 0, 0], X[y_kmeans == 0, 1], c = 'magenta', label = 'Cluster 5')
|
||
|
plt.title('Clusters de clients')
|
||
|
plt.xlabel('Salaire annuel')
|
||
|
plt.ylabel('Spending Score')
|
||
|
plt.legend()
|