commit 4a61477f6e8b289ea2eb49b936176012894a4210 Author: paul-corbalan Date: Mon Aug 21 17:11:21 2023 +0200 Import diff --git a/Partie 1 - Data Preprocessing/Data.csv b/Partie 1 - Data Preprocessing/Data.csv new file mode 100644 index 0000000..12b7dc3 --- /dev/null +++ b/Partie 1 - Data Preprocessing/Data.csv @@ -0,0 +1,11 @@ +Country,Age,Salary,Purchased +France,44,72000,No +Spain,27,48000,Yes +Germany,30,54000,No +Spain,38,61000,No +Germany,40,,Yes +France,35,58000,Yes +Spain,,52000,No +France,48,79000,Yes +Germany,50,83000,No +France,37,67000,Yes \ No newline at end of file diff --git a/Partie 1 - Data Preprocessing/data_preprocessing.py b/Partie 1 - Data Preprocessing/data_preprocessing.py new file mode 100644 index 0000000..1930926 --- /dev/null +++ b/Partie 1 - Data Preprocessing/data_preprocessing.py @@ -0,0 +1,36 @@ +# Data Preprocessing + +# Importer les librairies +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd + +# Importer le dataset +dataset = pd.read_csv('Data.csv') +X = dataset.iloc[:, :-1].values +y = dataset.iloc[:, -1].values + +# Gérer les données manquantes +from sklearn.preprocessing import Imputer +imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0) +imputer.fit(X[:, 1:3]) +X[:, 1:3] = imputer.transform(X[:, 1:3]) + +# Gérer les variables catégoriques +from sklearn.preprocessing import LabelEncoder, OneHotEncoder +labelencoder_X = LabelEncoder() +X[:, 0] = labelencoder_X.fit_transform(X[:, 0]) +onehotencoder = OneHotEncoder(categorical_features = [0]) +X = onehotencoder.fit_transform(X).toarray() +labelencoder_y = LabelEncoder() +y = labelencoder_y.fit_transform(y) + +# Diviser le dataset entre le Training set et le Test set +from sklearn.model_selection import train_test_split +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) + +# Feature Scaling +from sklearn.preprocessing import StandardScaler +sc = StandardScaler() +X_train = sc.fit_transform(X_train) +X_test = sc.transform(X_test) \ No newline at end of file diff --git a/Partie 2 - Régression/50_Startups.csv b/Partie 2 - Régression/50_Startups.csv new file mode 100644 index 0000000..b1cc5f2 --- /dev/null +++ b/Partie 2 - Régression/50_Startups.csv @@ -0,0 +1,51 @@ +R&D Spend,Administration,Marketing Spend,State,Profit +165349.2,136897.8,471784.1,New York,192261.83 +162597.7,151377.59,443898.53,California,191792.06 +153441.51,101145.55,407934.54,Florida,191050.39 +144372.41,118671.85,383199.62,New York,182901.99 +142107.34,91391.77,366168.42,Florida,166187.94 +131876.9,99814.71,362861.36,New York,156991.12 +134615.46,147198.87,127716.82,California,156122.51 +130298.13,145530.06,323876.68,Florida,155752.6 +120542.52,148718.95,311613.29,New York,152211.77 +123334.88,108679.17,304981.62,California,149759.96 +101913.08,110594.11,229160.95,Florida,146121.95 +100671.96,91790.61,249744.55,California,144259.4 +93863.75,127320.38,249839.44,Florida,141585.52 +91992.39,135495.07,252664.93,California,134307.35 +119943.24,156547.42,256512.92,Florida,132602.65 +114523.61,122616.84,261776.23,New York,129917.04 +78013.11,121597.55,264346.06,California,126992.93 +94657.16,145077.58,282574.31,New York,125370.37 +91749.16,114175.79,294919.57,Florida,124266.9 +86419.7,153514.11,0,New York,122776.86 +76253.86,113867.3,298664.47,California,118474.03 +78389.47,153773.43,299737.29,New York,111313.02 +73994.56,122782.75,303319.26,Florida,110352.25 +67532.53,105751.03,304768.73,Florida,108733.99 +77044.01,99281.34,140574.81,New York,108552.04 +64664.71,139553.16,137962.62,California,107404.34 +75328.87,144135.98,134050.07,Florida,105733.54 +72107.6,127864.55,353183.81,New York,105008.31 +66051.52,182645.56,118148.2,Florida,103282.38 +65605.48,153032.06,107138.38,New York,101004.64 +61994.48,115641.28,91131.24,Florida,99937.59 +61136.38,152701.92,88218.23,New York,97483.56 +63408.86,129219.61,46085.25,California,97427.84 +55493.95,103057.49,214634.81,Florida,96778.92 +46426.07,157693.92,210797.67,California,96712.8 +46014.02,85047.44,205517.64,New York,96479.51 +28663.76,127056.21,201126.82,Florida,90708.19 +44069.95,51283.14,197029.42,California,89949.14 +20229.59,65947.93,185265.1,New York,81229.06 +38558.51,82982.09,174999.3,California,81005.76 +28754.33,118546.05,172795.67,California,78239.91 +27892.92,84710.77,164470.71,Florida,77798.83 +23640.93,96189.63,148001.11,California,71498.49 +15505.73,127382.3,35534.17,New York,69758.98 +22177.74,154806.14,28334.72,California,65200.33 +1000.23,124153.04,1903.93,New York,64926.08 +1315.46,115816.21,297114.46,Florida,49490.75 +0,135426.92,0,California,42559.73 +542.05,51743.15,0,New York,35673.41 +0,116983.8,45173.06,California,14681.4 \ No newline at end of file diff --git a/Partie 2 - Régression/Position_Salaries.csv b/Partie 2 - Régression/Position_Salaries.csv new file mode 100644 index 0000000..0c752c7 --- /dev/null +++ b/Partie 2 - Régression/Position_Salaries.csv @@ -0,0 +1,11 @@ +Position,Level,Salary +Business Analyst,1,45000 +Junior Consultant,2,50000 +Senior Consultant,3,60000 +Manager,4,80000 +Country Manager,5,110000 +Region Manager,6,150000 +Partner,7,200000 +Senior Partner,8,300000 +C-level,9,500000 +CEO,10,1000000 \ No newline at end of file diff --git a/Partie 2 - Régression/Salary_Data.csv b/Partie 2 - Régression/Salary_Data.csv new file mode 100644 index 0000000..7945ad6 --- /dev/null +++ b/Partie 2 - Régression/Salary_Data.csv @@ -0,0 +1,31 @@ +YearsExperience,Salary +1.1,39343.00 +1.3,46205.00 +1.5,37731.00 +2.0,43525.00 +2.2,39891.00 +2.9,56642.00 +3.0,60150.00 +3.2,54445.00 +3.2,64445.00 +3.7,57189.00 +3.9,63218.00 +4.0,55794.00 +4.0,56957.00 +4.1,57081.00 +4.5,61111.00 +4.9,67938.00 +5.1,66029.00 +5.3,83088.00 +5.9,81363.00 +6.0,93940.00 +6.8,91738.00 +7.1,98273.00 +7.9,101302.00 +8.2,113812.00 +8.7,109431.00 +9.0,105582.00 +9.5,116969.00 +9.6,112635.00 +10.3,122391.00 +10.5,121872.00 diff --git a/Partie 2 - Régression/regression_lineaire_multiple.py b/Partie 2 - Régression/regression_lineaire_multiple.py new file mode 100644 index 0000000..e0940ee --- /dev/null +++ b/Partie 2 - Régression/regression_lineaire_multiple.py @@ -0,0 +1,32 @@ +# Regression Linéaire Multiple + +# Importer les librairies +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd + +# Importer le dataset +dataset = pd.read_csv('50_Startups.csv') +X = dataset.iloc[:, :-1].values +y = dataset.iloc[:, -1].values + +# Gérer les variables catégoriques +from sklearn.preprocessing import LabelEncoder, OneHotEncoder +labelencoder_X = LabelEncoder() +X[:, 3] = labelencoder_X.fit_transform(X[:, 3]) +onehotencoder = OneHotEncoder(categorical_features = [3]) +X = onehotencoder.fit_transform(X).toarray() +X = X[:, 1:] + +# Diviser le dataset entre le Training set et le Test set +from sklearn.model_selection import train_test_split +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) + +# Construction du modèle +from sklearn.linear_model import LinearRegression +regressor = LinearRegression() +regressor.fit(X_train, y_train) + +# Faire de nouvelles prédictions +y_pred = regressor.predict(X_test) +regressor.predict(np.array([[1, 0, 130000, 140000, 300000]])) \ No newline at end of file diff --git a/Partie 2 - Régression/regression_lineaire_simple.py b/Partie 2 - Régression/regression_lineaire_simple.py new file mode 100644 index 0000000..4f59196 --- /dev/null +++ b/Partie 2 - Régression/regression_lineaire_simple.py @@ -0,0 +1,32 @@ +# Regression Linéaire Simple + +# Importer les librairies +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd + +# Importer le dataset +dataset = pd.read_csv('Salary_Data.csv') +X = dataset.iloc[:, :-1].values +y = dataset.iloc[:, -1].values + +# Diviser le dataset entre le Training set et le Test set +from sklearn.model_selection import train_test_split +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 1.0/3, random_state = 0) + +# Construction du modèle +from sklearn.linear_model import LinearRegression +regressor = LinearRegression() +regressor.fit(X_train, y_train) + +# Faire de nouvelles prédictions +y_pred = regressor.predict(X_test) +regressor.predict(15) + +# Visualiser les résultats +plt.scatter(X_test, y_test, color = 'red') +plt.plot(X_train, regressor.predict(X_train), color = 'blue') +plt.title('Salaire vs Experience') +plt.xlabel('Experience') +plt.ylabel('Salaire') +plt.show() \ No newline at end of file diff --git a/Partie 2 - Régression/regression_polynomiale.py b/Partie 2 - Régression/regression_polynomiale.py new file mode 100644 index 0000000..bf6d8b3 --- /dev/null +++ b/Partie 2 - Régression/regression_polynomiale.py @@ -0,0 +1,40 @@ +# Regression Polynomiale + +# Importer les librairies +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd + +# Importer le dataset +dataset = pd.read_csv('Position_Salaries.csv') +X = dataset.iloc[:, 1:2].values +y = dataset.iloc[:, -1].values + +# Construction du modèle +from sklearn.linear_model import LinearRegression +from sklearn.preprocessing import PolynomialFeatures +poly_reg = PolynomialFeatures(degree = 4) +X_poly = poly_reg.fit_transform(X) +regressor = LinearRegression() +regressor.fit(X_poly, y) + +# Faire de nouvelles prédictions +regressor.predict(15) + +# Visualiser les résultats +plt.scatter(X, y, color = 'red') +plt.plot(X, regressor.predict(X_poly), color = 'blue') +plt.title('Salaire vs Experience') +plt.xlabel('Experience') +plt.ylabel('Salaire') +plt.show() + +# Visualiser les résultats (courbe plus lisse) +X_grid = np.arange(min(X), max(X), 0.1) +X_grid = X_grid.reshape((len(X_grid), 1)) +plt.scatter(X, y, color = 'red') +plt.plot(X_grid, regressor.predict(poly_reg.fit_transform(X_grid)), color = 'blue') +plt.title('Salaire vs Experience') +plt.xlabel('Experience') +plt.ylabel('Salaire') +plt.show() \ No newline at end of file diff --git a/Partie 3 - Classification/Social_Network_Ads.csv b/Partie 3 - Classification/Social_Network_Ads.csv new file mode 100644 index 0000000..fef4357 --- /dev/null +++ b/Partie 3 - Classification/Social_Network_Ads.csv @@ -0,0 +1,401 @@ +User ID,Gender,Age,EstimatedSalary,Purchased +15624510,Male,19,19000,0 +15810944,Male,35,20000,0 +15668575,Female,26,43000,0 +15603246,Female,27,57000,0 +15804002,Male,19,76000,0 +15728773,Male,27,58000,0 +15598044,Female,27,84000,0 +15694829,Female,32,150000,1 +15600575,Male,25,33000,0 +15727311,Female,35,65000,0 +15570769,Female,26,80000,0 +15606274,Female,26,52000,0 +15746139,Male,20,86000,0 +15704987,Male,32,18000,0 +15628972,Male,18,82000,0 +15697686,Male,29,80000,0 +15733883,Male,47,25000,1 +15617482,Male,45,26000,1 +15704583,Male,46,28000,1 +15621083,Female,48,29000,1 +15649487,Male,45,22000,1 +15736760,Female,47,49000,1 +15714658,Male,48,41000,1 +15599081,Female,45,22000,1 +15705113,Male,46,23000,1 +15631159,Male,47,20000,1 +15792818,Male,49,28000,1 +15633531,Female,47,30000,1 +15744529,Male,29,43000,0 +15669656,Male,31,18000,0 +15581198,Male,31,74000,0 +15729054,Female,27,137000,1 +15573452,Female,21,16000,0 +15776733,Female,28,44000,0 +15724858,Male,27,90000,0 +15713144,Male,35,27000,0 +15690188,Female,33,28000,0 +15689425,Male,30,49000,0 +15671766,Female,26,72000,0 +15782806,Female,27,31000,0 +15764419,Female,27,17000,0 +15591915,Female,33,51000,0 +15772798,Male,35,108000,0 +15792008,Male,30,15000,0 +15715541,Female,28,84000,0 +15639277,Male,23,20000,0 +15798850,Male,25,79000,0 +15776348,Female,27,54000,0 +15727696,Male,30,135000,1 +15793813,Female,31,89000,0 +15694395,Female,24,32000,0 +15764195,Female,18,44000,0 +15744919,Female,29,83000,0 +15671655,Female,35,23000,0 +15654901,Female,27,58000,0 +15649136,Female,24,55000,0 +15775562,Female,23,48000,0 +15807481,Male,28,79000,0 +15642885,Male,22,18000,0 +15789109,Female,32,117000,0 +15814004,Male,27,20000,0 +15673619,Male,25,87000,0 +15595135,Female,23,66000,0 +15583681,Male,32,120000,1 +15605000,Female,59,83000,0 +15718071,Male,24,58000,0 +15679760,Male,24,19000,0 +15654574,Female,23,82000,0 +15577178,Female,22,63000,0 +15595324,Female,31,68000,0 +15756932,Male,25,80000,0 +15726358,Female,24,27000,0 +15595228,Female,20,23000,0 +15782530,Female,33,113000,0 +15592877,Male,32,18000,0 +15651983,Male,34,112000,1 +15746737,Male,18,52000,0 +15774179,Female,22,27000,0 +15667265,Female,28,87000,0 +15655123,Female,26,17000,0 +15595917,Male,30,80000,0 +15668385,Male,39,42000,0 +15709476,Male,20,49000,0 +15711218,Male,35,88000,0 +15798659,Female,30,62000,0 +15663939,Female,31,118000,1 +15694946,Male,24,55000,0 +15631912,Female,28,85000,0 +15768816,Male,26,81000,0 +15682268,Male,35,50000,0 +15684801,Male,22,81000,0 +15636428,Female,30,116000,0 +15809823,Male,26,15000,0 +15699284,Female,29,28000,0 +15786993,Female,29,83000,0 +15709441,Female,35,44000,0 +15710257,Female,35,25000,0 +15582492,Male,28,123000,1 +15575694,Male,35,73000,0 +15756820,Female,28,37000,0 +15766289,Male,27,88000,0 +15593014,Male,28,59000,0 +15584545,Female,32,86000,0 +15675949,Female,33,149000,1 +15672091,Female,19,21000,0 +15801658,Male,21,72000,0 +15706185,Female,26,35000,0 +15789863,Male,27,89000,0 +15720943,Male,26,86000,0 +15697997,Female,38,80000,0 +15665416,Female,39,71000,0 +15660200,Female,37,71000,0 +15619653,Male,38,61000,0 +15773447,Male,37,55000,0 +15739160,Male,42,80000,0 +15689237,Male,40,57000,0 +15679297,Male,35,75000,0 +15591433,Male,36,52000,0 +15642725,Male,40,59000,0 +15701962,Male,41,59000,0 +15811613,Female,36,75000,0 +15741049,Male,37,72000,0 +15724423,Female,40,75000,0 +15574305,Male,35,53000,0 +15678168,Female,41,51000,0 +15697020,Female,39,61000,0 +15610801,Male,42,65000,0 +15745232,Male,26,32000,0 +15722758,Male,30,17000,0 +15792102,Female,26,84000,0 +15675185,Male,31,58000,0 +15801247,Male,33,31000,0 +15725660,Male,30,87000,0 +15638963,Female,21,68000,0 +15800061,Female,28,55000,0 +15578006,Male,23,63000,0 +15668504,Female,20,82000,0 +15687491,Male,30,107000,1 +15610403,Female,28,59000,0 +15741094,Male,19,25000,0 +15807909,Male,19,85000,0 +15666141,Female,18,68000,0 +15617134,Male,35,59000,0 +15783029,Male,30,89000,0 +15622833,Female,34,25000,0 +15746422,Female,24,89000,0 +15750839,Female,27,96000,1 +15749130,Female,41,30000,0 +15779862,Male,29,61000,0 +15767871,Male,20,74000,0 +15679651,Female,26,15000,0 +15576219,Male,41,45000,0 +15699247,Male,31,76000,0 +15619087,Female,36,50000,0 +15605327,Male,40,47000,0 +15610140,Female,31,15000,0 +15791174,Male,46,59000,0 +15602373,Male,29,75000,0 +15762605,Male,26,30000,0 +15598840,Female,32,135000,1 +15744279,Male,32,100000,1 +15670619,Male,25,90000,0 +15599533,Female,37,33000,0 +15757837,Male,35,38000,0 +15697574,Female,33,69000,0 +15578738,Female,18,86000,0 +15762228,Female,22,55000,0 +15614827,Female,35,71000,0 +15789815,Male,29,148000,1 +15579781,Female,29,47000,0 +15587013,Male,21,88000,0 +15570932,Male,34,115000,0 +15794661,Female,26,118000,0 +15581654,Female,34,43000,0 +15644296,Female,34,72000,0 +15614420,Female,23,28000,0 +15609653,Female,35,47000,0 +15594577,Male,25,22000,0 +15584114,Male,24,23000,0 +15673367,Female,31,34000,0 +15685576,Male,26,16000,0 +15774727,Female,31,71000,0 +15694288,Female,32,117000,1 +15603319,Male,33,43000,0 +15759066,Female,33,60000,0 +15814816,Male,31,66000,0 +15724402,Female,20,82000,0 +15571059,Female,33,41000,0 +15674206,Male,35,72000,0 +15715160,Male,28,32000,0 +15730448,Male,24,84000,0 +15662067,Female,19,26000,0 +15779581,Male,29,43000,0 +15662901,Male,19,70000,0 +15689751,Male,28,89000,0 +15667742,Male,34,43000,0 +15738448,Female,30,79000,0 +15680243,Female,20,36000,0 +15745083,Male,26,80000,0 +15708228,Male,35,22000,0 +15628523,Male,35,39000,0 +15708196,Male,49,74000,0 +15735549,Female,39,134000,1 +15809347,Female,41,71000,0 +15660866,Female,58,101000,1 +15766609,Female,47,47000,0 +15654230,Female,55,130000,1 +15794566,Female,52,114000,0 +15800890,Female,40,142000,1 +15697424,Female,46,22000,0 +15724536,Female,48,96000,1 +15735878,Male,52,150000,1 +15707596,Female,59,42000,0 +15657163,Male,35,58000.0,0 +15622478,Male,47.0,43000,0 +15779529,Female,60,108000,1 +15636023,Male,49,65000,0 +15582066,Male,40,78000,0 +15666675,Female,46,96000,0 +15732987,Male,59,143000,1 +15789432,Female,41,80000,0 +15663161,Male,35,91000,1 +15694879,Male,37,144000,1 +15593715,Male,60,102000,1 +15575002,Female,35,60000,0 +15622171,Male,37,53000,0 +15795224,Female,36,126000,1 +15685346,Male,56,133000,1 +15691808,Female,40,72000,0 +15721007,Female,42,80000,1 +15794253,Female,35,147000,1 +15694453,Male,39,42000,0 +15813113,Male,40,107000,1 +15614187,Male,49,86000,1 +15619407,Female,38,112000,0 +15646227,Male,46,79000,1 +15660541,Male,40,57000,0 +15753874,Female,37,80000,0 +15617877,Female,46,82000,0 +15772073,Female,53,143000,1 +15701537,Male,42,149000,1 +15736228,Male,38,59000,0 +15780572,Female,50,88000,1 +15769596,Female,56,104000,1 +15586996,Female,41,72000,0 +15722061,Female,51,146000,1 +15638003,Female,35,50000,0 +15775590,Female,57,122000,1 +15730688,Male,41,52000,0 +15753102,Female,35,97000,1 +15810075,Female,44,39000,0 +15723373,Male,37,52000,0 +15795298,Female,48,134000,1 +15584320,Female,37,146000,1 +15724161,Female,50,44000,0 +15750056,Female,52,90000,1 +15609637,Female,41,72000,0 +15794493,Male,40,57000,0 +15569641,Female,58,95000,1 +15815236,Female,45,131000,1 +15811177,Female,35,77000,0 +15680587,Male,36,144000,1 +15672821,Female,55,125000,1 +15767681,Female,35,72000,0 +15600379,Male,48,90000,1 +15801336,Female,42,108000,1 +15721592,Male,40,75000,0 +15581282,Male,37,74000,0 +15746203,Female,47,144000,1 +15583137,Male,40,61000,0 +15680752,Female,43,133000,0 +15688172,Female,59,76000,1 +15791373,Male,60,42000,1 +15589449,Male,39,106000,1 +15692819,Female,57,26000,1 +15727467,Male,57,74000,1 +15734312,Male,38,71000,0 +15764604,Male,49,88000,1 +15613014,Female,52,38000,1 +15759684,Female,50,36000,1 +15609669,Female,59,88000,1 +15685536,Male,35,61000,0 +15750447,Male,37,70000,1 +15663249,Female,52,21000,1 +15638646,Male,48,141000,0 +15734161,Female,37,93000,1 +15631070,Female,37,62000,0 +15761950,Female,48,138000,1 +15649668,Male,41,79000,0 +15713912,Female,37,78000,1 +15586757,Male,39,134000,1 +15596522,Male,49,89000,1 +15625395,Male,55,39000,1 +15760570,Male,37,77000,0 +15566689,Female,35,57000,0 +15725794,Female,36,63000,0 +15673539,Male,42,73000,1 +15705298,Female,43,112000,1 +15675791,Male,45,79000,0 +15747043,Male,46,117000,1 +15736397,Female,58,38000,1 +15678201,Male,48,74000,1 +15720745,Female,37,137000,1 +15637593,Male,37,79000,1 +15598070,Female,40,60000,0 +15787550,Male,42,54000,0 +15603942,Female,51,134000,0 +15733973,Female,47,113000,1 +15596761,Male,36,125000,1 +15652400,Female,38,50000,0 +15717893,Female,42,70000,0 +15622585,Male,39,96000,1 +15733964,Female,38,50000,0 +15753861,Female,49,141000,1 +15747097,Female,39,79000,0 +15594762,Female,39,75000,1 +15667417,Female,54,104000,1 +15684861,Male,35,55000,0 +15742204,Male,45,32000,1 +15623502,Male,36,60000,0 +15774872,Female,52,138000,1 +15611191,Female,53,82000,1 +15674331,Male,41,52000,0 +15619465,Female,48,30000,1 +15575247,Female,48,131000,1 +15695679,Female,41,60000,0 +15713463,Male,41,72000,0 +15785170,Female,42,75000,0 +15796351,Male,36,118000,1 +15639576,Female,47,107000,1 +15693264,Male,38,51000,0 +15589715,Female,48,119000,1 +15769902,Male,42,65000,0 +15587177,Male,40,65000,0 +15814553,Male,57,60000,1 +15601550,Female,36,54000,0 +15664907,Male,58,144000,1 +15612465,Male,35,79000,0 +15810800,Female,38,55000,0 +15665760,Male,39,122000,1 +15588080,Female,53,104000,1 +15776844,Male,35,75000,0 +15717560,Female,38,65000,0 +15629739,Female,47,51000,1 +15729908,Male,47,105000,1 +15716781,Female,41,63000,0 +15646936,Male,53,72000,1 +15768151,Female,54,108000,1 +15579212,Male,39,77000,0 +15721835,Male,38,61000,0 +15800515,Female,38,113000,1 +15591279,Male,37,75000,0 +15587419,Female,42,90000,1 +15750335,Female,37,57000,0 +15699619,Male,36,99000,1 +15606472,Male,60,34000,1 +15778368,Male,54,70000,1 +15671387,Female,41,72000,0 +15573926,Male,40,71000,1 +15709183,Male,42,54000,0 +15577514,Male,43,129000,1 +15778830,Female,53,34000,1 +15768072,Female,47,50000,1 +15768293,Female,42,79000,0 +15654456,Male,42,104000,1 +15807525,Female,59,29000,1 +15574372,Female,58,47000,1 +15671249,Male,46,88000,1 +15779744,Male,38,71000,0 +15624755,Female,54,26000,1 +15611430,Female,60,46000,1 +15774744,Male,60,83000,1 +15629885,Female,39,73000,0 +15708791,Male,59,130000,1 +15793890,Female,37,80000,0 +15646091,Female,46,32000,1 +15596984,Female,46,74000,0 +15800215,Female,42,53000,0 +15577806,Male,41,87000,1 +15749381,Female,58,23000,1 +15683758,Male,42,64000,0 +15670615,Male,48,33000,1 +15715622,Female,44,139000,1 +15707634,Male,49,28000,1 +15806901,Female,57,33000,1 +15775335,Male,56,60000,1 +15724150,Female,49,39000,1 +15627220,Male,39,71000,0 +15672330,Male,47,34000,1 +15668521,Female,48,35000,1 +15807837,Male,48,33000,1 +15592570,Male,47,23000,1 +15748589,Female,45,45000,1 +15635893,Male,60,42000,1 +15757632,Female,39,59000,0 +15691863,Female,46,41000,1 +15706071,Male,51,23000,1 +15654296,Female,50,20000,1 +15755018,Male,36,33000,0 +15594041,Female,49,36000,1 \ No newline at end of file diff --git a/Partie 3 - Classification/regression_logistique.py b/Partie 3 - Classification/regression_logistique.py new file mode 100644 index 0000000..b75b167 --- /dev/null +++ b/Partie 3 - Classification/regression_logistique.py @@ -0,0 +1,51 @@ +# Regression Logistique + +# Importer les librairies +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd + +# Importer le dataset +dataset = pd.read_csv('Social_Network_Ads.csv') +X = dataset.iloc[:, [2, 3]].values +y = dataset.iloc[:, -1].values + +# Diviser le dataset entre le Training set et le Test set +from sklearn.model_selection import train_test_split +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) + +# Feature Scaling +from sklearn.preprocessing import StandardScaler +sc = StandardScaler() +X_train = sc.fit_transform(X_train) +X_test = sc.transform(X_test) + +# Construction du modèle +from sklearn.linear_model import LogisticRegression +classifier = LogisticRegression(random_state = 0) +classifier.fit(X_train, y_train) + +# Faire de nouvelles prédictions +y_pred = classifier.predict(X_test) + +# Matrice de confusion +from sklearn.metrics import confusion_matrix +cm = confusion_matrix(y_test, y_pred) + +# Visualiser les résultats +from matplotlib.colors import ListedColormap +X_set, y_set = X_train, y_train +X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), + np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) +plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), + alpha = 0.4, cmap = ListedColormap(('red', 'green'))) +plt.xlim(X1.min(), X1.max()) +plt.ylim(X2.min(), X2.max()) +for i, j in enumerate(np.unique(y_set)): + plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], + c = ListedColormap(('red', 'green'))(i), label = j) +plt.title('Résultats du Training set') +plt.xlabel('Age') +plt.ylabel('Salaire Estimé') +plt.legend() +plt.show() \ No newline at end of file diff --git a/Partie 3 - Classification/svm.py b/Partie 3 - Classification/svm.py new file mode 100644 index 0000000..9853381 --- /dev/null +++ b/Partie 3 - Classification/svm.py @@ -0,0 +1,51 @@ +# SVM + +# Importer les librairies +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd + +# Importer le dataset +dataset = pd.read_csv('Social_Network_Ads.csv') +X = dataset.iloc[:, [2, 3]].values +y = dataset.iloc[:, -1].values + +# Diviser le dataset entre le Training set et le Test set +from sklearn.model_selection import train_test_split +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) + +# Feature Scaling +from sklearn.preprocessing import StandardScaler +sc = StandardScaler() +X_train = sc.fit_transform(X_train) +X_test = sc.transform(X_test) + +# Construction du modèle +from sklearn.svm import SVC +classifier = SVC(kernel = 'linear', random_state = 0) +classifier.fit(X_train, y_train) + +# Faire de nouvelles prédictions +y_pred = classifier.predict(X_test) + +# Matrice de confusion +from sklearn.metrics import confusion_matrix +cm = confusion_matrix(y_test, y_pred) + +# Visualiser les résultats +from matplotlib.colors import ListedColormap +X_set, y_set = X_train, y_train +X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), + np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) +plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), + alpha = 0.4, cmap = ListedColormap(('red', 'green'))) +plt.xlim(X1.min(), X1.max()) +plt.ylim(X2.min(), X2.max()) +for i, j in enumerate(np.unique(y_set)): + plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], + c = ListedColormap(('red', 'green'))(i), label = j) +plt.title('Résultats du Training set') +plt.xlabel('Age') +plt.ylabel('Salaire Estimé') +plt.legend() +plt.show() \ No newline at end of file diff --git a/Partie 4 - Clustering/Mall_Customers.csv b/Partie 4 - Clustering/Mall_Customers.csv new file mode 100644 index 0000000..9d16c77 --- /dev/null +++ b/Partie 4 - Clustering/Mall_Customers.csv @@ -0,0 +1,201 @@ +CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100) +0001,Male,19,15,39 +0002,Male,21,15,81 +0003,Female,20,16,6 +0004,Female,23,16,77 +0005,Female,31,17,40 +0006,Female,22,17,76 +0007,Female,35,18,6 +0008,Female,23,18,94 +0009,Male,64,19,3 +0010,Female,30,19,72 +0011,Male,67,19,14 +0012,Female,35,19,99 +0013,Female,58,20,15 +0014,Female,24,20,77 +0015,Male,37,20,13 +0016,Male,22,20,79 +0017,Female,35,21,35 +0018,Male,20,21,66 +0019,Male,52,23,29 +0020,Female,35,23,98 +0021,Male,35,24,35 +0022,Male,25,24,73 +0023,Female,46,25,5 +0024,Male,31,25,73 +0025,Female,54,28,14 +0026,Male,29,28,82 +0027,Female,45,28,32 +0028,Male,35,28,61 +0029,Female,40,29,31 +0030,Female,23,29,87 +0031,Male,60,30,4 +0032,Female,21,30,73 +0033,Male,53,33,4 +0034,Male,18,33,92 +0035,Female,49,33,14 +0036,Female,21,33,81 +0037,Female,42,34,17 +0038,Female,30,34,73 +0039,Female,36,37,26 +0040,Female,20,37,75 +0041,Female,65,38,35 +0042,Male,24,38,92 +0043,Male,48,39,36 +0044,Female,31,39,61 +0045,Female,49,39,28 +0046,Female,24,39,65 +0047,Female,50,40,55 +0048,Female,27,40,47 +0049,Female,29,40,42 +0050,Female,31,40,42 +0051,Female,49,42,52 +0052,Male,33,42,60 +0053,Female,31,43,54 +0054,Male,59,43,60 +0055,Female,50,43,45 +0056,Male,47,43,41 +0057,Female,51,44,50 +0058,Male,69,44,46 +0059,Female,27,46,51 +0060,Male,53,46,46 +0061,Male,70,46,56 +0062,Male,19,46,55 +0063,Female,67,47,52 +0064,Female,54,47,59 +0065,Male,63,48,51 +0066,Male,18,48,59 +0067,Female,43,48,50 +0068,Female,68,48,48 +0069,Male,19,48,59 +0070,Female,32,48,47 +0071,Male,70,49,55 +0072,Female,47,49,42 +0073,Female,60,50,49 +0074,Female,60,50,56 +0075,Male,59,54,47 +0076,Male,26,54,54 +0077,Female,45,54,53 +0078,Male,40,54,48 +0079,Female,23,54,52 +0080,Female,49,54,42 +0081,Male,57,54,51 +0082,Male,38,54,55 +0083,Male,67,54,41 +0084,Female,46,54,44 +0085,Female,21,54,57 +0086,Male,48,54,46 +0087,Female,55,57,58 +0088,Female,22,57,55 +0089,Female,34,58,60 +0090,Female,50,58,46 +0091,Female,68,59,55 +0092,Male,18,59,41 +0093,Male,48,60,49 +0094,Female,40,60,40 +0095,Female,32,60,42 +0096,Male,24,60,52 +0097,Female,47,60,47 +0098,Female,27,60,50 +0099,Male,48,61,42 +0100,Male,20,61,49 +0101,Female,23,62,41 +0102,Female,49,62,48 +0103,Male,67,62,59 +0104,Male,26,62,55 +0105,Male,49,62,56 +0106,Female,21,62,42 +0107,Female,66,63,50 +0108,Male,54,63,46 +0109,Male,68,63,43 +0110,Male,66,63,48 +0111,Male,65,63,52 +0112,Female,19,63,54 +0113,Female,38,64,42 +0114,Male,19,64,46 +0115,Female,18,65,48 +0116,Female,19,65,50 +0117,Female,63,65,43 +0118,Female,49,65,59 +0119,Female,51,67,43 +0120,Female,50,67,57 +0121,Male,27,67,56 +0122,Female,38,67,40 +0123,Female,40,69,58 +0124,Male,39,69,91 +0125,Female,23,70,29 +0126,Female,31,70,77 +0127,Male,43,71,35 +0128,Male,40,71,95 +0129,Male,59,71,11 +0130,Male,38,71,75 +0131,Male,47,71,9 +0132,Male,39,71,75 +0133,Female,25,72,34 +0134,Female,31,72,71 +0135,Male,20,73,5 +0136,Female,29,73,88 +0137,Female,44,73,7 +0138,Male,32,73,73 +0139,Male,19,74,10 +0140,Female,35,74,72 +0141,Female,57,75,5 +0142,Male,32,75,93 +0143,Female,28,76,40 +0144,Female,32,76,87 +0145,Male,25,77,12 +0146,Male,28,77,97 +0147,Male,48,77,36 +0148,Female,32,77,74 +0149,Female,34,78,22 +0150,Male,34,78,90 +0151,Male,43,78,17 +0152,Male,39,78,88 +0153,Female,44,78,20 +0154,Female,38,78,76 +0155,Female,47,78,16 +0156,Female,27,78,89 +0157,Male,37,78,1 +0158,Female,30,78,78 +0159,Male,34,78,1 +0160,Female,30,78,73 +0161,Female,56,79,35 +0162,Female,29,79,83 +0163,Male,19,81,5 +0164,Female,31,81,93 +0165,Male,50,85,26 +0166,Female,36,85,75 +0167,Male,42,86,20 +0168,Female,33,86,95 +0169,Female,36,87,27 +0170,Male,32,87,63 +0171,Male,40,87,13 +0172,Male,28,87,75 +0173,Male,36,87,10 +0174,Male,36,87,92 +0175,Female,52,88,13 +0176,Female,30,88,86 +0177,Male,58,88,15 +0178,Male,27,88,69 +0179,Male,59,93,14 +0180,Male,35,93,90 +0181,Female,37,97,32 +0182,Female,32,97,86 +0183,Male,46,98,15 +0184,Female,29,98,88 +0185,Female,41,99,39 +0186,Male,30,99,97 +0187,Female,54,101,24 +0188,Male,28,101,68 +0189,Female,41,103,17 +0190,Female,36,103,85 +0191,Female,34,103,23 +0192,Female,32,103,69 +0193,Male,33,113,8 +0194,Female,38,113,91 +0195,Female,47,120,16 +0196,Female,35,120,79 +0197,Female,45,126,28 +0198,Male,32,126,74 +0199,Male,32,137,18 +0200,Male,30,137,83 \ No newline at end of file diff --git a/Partie 4 - Clustering/k_means.py b/Partie 4 - Clustering/k_means.py new file mode 100644 index 0000000..58f66ed --- /dev/null +++ b/Partie 4 - Clustering/k_means.py @@ -0,0 +1,39 @@ +# K-Means + +# Importer les librairies +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd + +# Importer le dataset +dataset = pd.read_csv('Mall_Customers.csv') +X = dataset.iloc[:, [3, 4]].values + +# Utiliser la méthode elbow pour trouver le nombre optimal de clusters +from sklearn.cluster import KMeans +wcss = [] +for i in range(1, 11): + kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 0) + kmeans.fit(X) + wcss.append(kmeans.inertia_) +plt.plot(range(1, 11), wcss) +plt.title('La méthode Elbow') +plt.xlabel('Nombre de clusters') +plt.ylabel('WCSS') +plt.show() + +# Construction du modèle +from sklearn.cluster import KMeans +kmeans = KMeans(n_clusters = 5, init = 'k-means++', random_state = 0) +y_kmeans = kmeans.fit_predict(X) + +# Visualiser les résultats +plt.scatter(X[y_kmeans == 1, 0], X[y_kmeans == 1, 1], c = 'red', label = 'Cluster 1') +plt.scatter(X[y_kmeans == 2, 0], X[y_kmeans == 2, 1], c = 'blue', label = 'Cluster 2') +plt.scatter(X[y_kmeans == 3, 0], X[y_kmeans == 3, 1], c = 'green', label = 'Cluster 3') +plt.scatter(X[y_kmeans == 4, 0], X[y_kmeans == 4, 1], c = 'cyan', label = 'Cluster 4') +plt.scatter(X[y_kmeans == 0, 0], X[y_kmeans == 0, 1], c = 'magenta', label = 'Cluster 5') +plt.title('Clusters de clients') +plt.xlabel('Salaire annuel') +plt.ylabel('Spending Score') +plt.legend() \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..1ed0af9 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# Introduction to Machine Learning +The codes are from the following course: +- [Introduction au Machine Learning | Udemy](https://www.udemy.com/course/introduction-au-machine-learning/)