513 lines
		
	
	
		
			18 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
		
		
			
		
	
	
			513 lines
		
	
	
		
			18 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
| 
								 | 
							
								# Predicting a new result
							 | 
						||
| 
								 | 
							
								y_pred = predict(regressor, data.frame(Level = 6.5))
							 | 
						||
| 
								 | 
							
								# Decision Tree Regression
							 | 
						||
| 
								 | 
							
								# Importing the dataset
							 | 
						||
| 
								 | 
							
								dataset = read.csv('Position_Salaries.csv')
							 | 
						||
| 
								 | 
							
								dataset = dataset[2:3]
							 | 
						||
| 
								 | 
							
								# Splitting the dataset into the Training set and Test set
							 | 
						||
| 
								 | 
							
								# # install.packages('caTools')
							 | 
						||
| 
								 | 
							
								# library(caTools)
							 | 
						||
| 
								 | 
							
								# set.seed(123)
							 | 
						||
| 
								 | 
							
								# split = sample.split(dataset$Salary, SplitRatio = 2/3)
							 | 
						||
| 
								 | 
							
								# training_set = subset(dataset, split == TRUE)
							 | 
						||
| 
								 | 
							
								# test_set = subset(dataset, split == FALSE)
							 | 
						||
| 
								 | 
							
								# Feature Scaling
							 | 
						||
| 
								 | 
							
								# training_set = scale(training_set)
							 | 
						||
| 
								 | 
							
								# test_set = scale(test_set)
							 | 
						||
| 
								 | 
							
								# Fitting Decision Tree Regression to the dataset
							 | 
						||
| 
								 | 
							
								# install.packages('rpart')
							 | 
						||
| 
								 | 
							
								library(rpart)
							 | 
						||
| 
								 | 
							
								regressor = rpart(formula = Salary ~ .,
							 | 
						||
| 
								 | 
							
								data = dataset,
							 | 
						||
| 
								 | 
							
								control = rpart.control(minsplit = 1))
							 | 
						||
| 
								 | 
							
								# Predicting a new result
							 | 
						||
| 
								 | 
							
								y_pred = predict(regressor, data.frame(Level = 6.5))
							 | 
						||
| 
								 | 
							
								# Decision Tree Regression
							 | 
						||
| 
								 | 
							
								# Importing the dataset
							 | 
						||
| 
								 | 
							
								dataset = read.csv('Position_Salaries.csv')
							 | 
						||
| 
								 | 
							
								dataset = dataset[2:3]
							 | 
						||
| 
								 | 
							
								# Splitting the dataset into the Training set and Test set
							 | 
						||
| 
								 | 
							
								# # install.packages('caTools')
							 | 
						||
| 
								 | 
							
								# library(caTools)
							 | 
						||
| 
								 | 
							
								# set.seed(123)
							 | 
						||
| 
								 | 
							
								# split = sample.split(dataset$Salary, SplitRatio = 2/3)
							 | 
						||
| 
								 | 
							
								# training_set = subset(dataset, split == TRUE)
							 | 
						||
| 
								 | 
							
								# test_set = subset(dataset, split == FALSE)
							 | 
						||
| 
								 | 
							
								# Feature Scaling
							 | 
						||
| 
								 | 
							
								# training_set = scale(training_set)
							 | 
						||
| 
								 | 
							
								# test_set = scale(test_set)
							 | 
						||
| 
								 | 
							
								# Fitting Decision Tree Regression to the dataset
							 | 
						||
| 
								 | 
							
								# install.packages('rpart')
							 | 
						||
| 
								 | 
							
								library(rpart)
							 | 
						||
| 
								 | 
							
								regressor = rpart(formula = Salary ~ .,
							 | 
						||
| 
								 | 
							
								data = dataset,
							 | 
						||
| 
								 | 
							
								control = rpart.control(minsplit = 1))
							 | 
						||
| 
								 | 
							
								# Predicting a new result
							 | 
						||
| 
								 | 
							
								y_pred = predict(regressor, data.frame(Level = 6.5))
							 | 
						||
| 
								 | 
							
								# Decision Tree Regression
							 | 
						||
| 
								 | 
							
								# Importing the dataset
							 | 
						||
| 
								 | 
							
								dataset = read.csv('Position_Salaries.csv')
							 | 
						||
| 
								 | 
							
								dataset = dataset[2:3]
							 | 
						||
| 
								 | 
							
								# Splitting the dataset into the Training set and Test set
							 | 
						||
| 
								 | 
							
								# # install.packages('caTools')
							 | 
						||
| 
								 | 
							
								# library(caTools)
							 | 
						||
| 
								 | 
							
								# set.seed(123)
							 | 
						||
| 
								 | 
							
								# split = sample.split(dataset$Salary, SplitRatio = 2/3)
							 | 
						||
| 
								 | 
							
								# training_set = subset(dataset, split == TRUE)
							 | 
						||
| 
								 | 
							
								# test_set = subset(dataset, split == FALSE)
							 | 
						||
| 
								 | 
							
								# Feature Scaling
							 | 
						||
| 
								 | 
							
								# training_set = scale(training_set)
							 | 
						||
| 
								 | 
							
								# test_set = scale(test_set)
							 | 
						||
| 
								 | 
							
								# Fitting Decision Tree Regression to the dataset
							 | 
						||
| 
								 | 
							
								# install.packages('rpart')
							 | 
						||
| 
								 | 
							
								library(rpart)
							 | 
						||
| 
								 | 
							
								regressor = rpart(formula = Salary ~ .,
							 | 
						||
| 
								 | 
							
								data = dataset,
							 | 
						||
| 
								 | 
							
								control = rpart.control(minsplit = 1))
							 | 
						||
| 
								 | 
							
								# Predicting a new result
							 | 
						||
| 
								 | 
							
								y_pred = predict(regressor, data.frame(Level = 6.5))
							 | 
						||
| 
								 | 
							
								# Decision Tree Regression
							 | 
						||
| 
								 | 
							
								# Importing the dataset
							 | 
						||
| 
								 | 
							
								dataset = read.csv('Position_Salaries.csv')
							 | 
						||
| 
								 | 
							
								dataset = dataset[2:3]
							 | 
						||
| 
								 | 
							
								# Splitting the dataset into the Training set and Test set
							 | 
						||
| 
								 | 
							
								# # install.packages('caTools')
							 | 
						||
| 
								 | 
							
								# library(caTools)
							 | 
						||
| 
								 | 
							
								# set.seed(123)
							 | 
						||
| 
								 | 
							
								# split = sample.split(dataset$Salary, SplitRatio = 2/3)
							 | 
						||
| 
								 | 
							
								# training_set = subset(dataset, split == TRUE)
							 | 
						||
| 
								 | 
							
								# test_set = subset(dataset, split == FALSE)
							 | 
						||
| 
								 | 
							
								# Feature Scaling
							 | 
						||
| 
								 | 
							
								# training_set = scale(training_set)
							 | 
						||
| 
								 | 
							
								# test_set = scale(test_set)
							 | 
						||
| 
								 | 
							
								# Fitting Decision Tree Regression to the dataset
							 | 
						||
| 
								 | 
							
								# install.packages('rpart')
							 | 
						||
| 
								 | 
							
								library(rpart)
							 | 
						||
| 
								 | 
							
								regressor = rpart(formula = Salary ~ .,
							 | 
						||
| 
								 | 
							
								data = dataset,
							 | 
						||
| 
								 | 
							
								control = rpart.control(minsplit = 1))
							 | 
						||
| 
								 | 
							
								# Predicting a new result
							 | 
						||
| 
								 | 
							
								y_pred = predict(regressor, data.frame(Level = 6.5))
							 | 
						||
| 
								 | 
							
								# Decision Tree Regression
							 | 
						||
| 
								 | 
							
								# Importing the dataset
							 | 
						||
| 
								 | 
							
								dataset = read.csv('Position_Salaries.csv')
							 | 
						||
| 
								 | 
							
								dataset = dataset[2:3]
							 | 
						||
| 
								 | 
							
								# Splitting the dataset into the Training set and Test set
							 | 
						||
| 
								 | 
							
								# # install.packages('caTools')
							 | 
						||
| 
								 | 
							
								# library(caTools)
							 | 
						||
| 
								 | 
							
								# set.seed(123)
							 | 
						||
| 
								 | 
							
								# split = sample.split(dataset$Salary, SplitRatio = 2/3)
							 | 
						||
| 
								 | 
							
								# training_set = subset(dataset, split == TRUE)
							 | 
						||
| 
								 | 
							
								# test_set = subset(dataset, split == FALSE)
							 | 
						||
| 
								 | 
							
								# Feature Scaling
							 | 
						||
| 
								 | 
							
								# training_set = scale(training_set)
							 | 
						||
| 
								 | 
							
								# test_set = scale(test_set)
							 | 
						||
| 
								 | 
							
								# Fitting Decision Tree Regression to the dataset
							 | 
						||
| 
								 | 
							
								# install.packages('rpart')
							 | 
						||
| 
								 | 
							
								library(rpart)
							 | 
						||
| 
								 | 
							
								regressor = rpart(formula = Salary ~ .,
							 | 
						||
| 
								 | 
							
								data = dataset,
							 | 
						||
| 
								 | 
							
								control = rpart.control(minsplit = 1))
							 | 
						||
| 
								 | 
							
								# Predicting a new result
							 | 
						||
| 
								 | 
							
								y_pred = predict(regressor, data.frame(Level = 6.5))
							 | 
						||
| 
								 | 
							
								# Decision Tree Regression
							 | 
						||
| 
								 | 
							
								# Importing the dataset
							 | 
						||
| 
								 | 
							
								dataset = read.csv('Position_Salaries.csv')
							 | 
						||
| 
								 | 
							
								dataset = dataset[2:3]
							 | 
						||
| 
								 | 
							
								# Splitting the dataset into the Training set and Test set
							 | 
						||
| 
								 | 
							
								# # install.packages('caTools')
							 | 
						||
| 
								 | 
							
								# library(caTools)
							 | 
						||
| 
								 | 
							
								# set.seed(123)
							 | 
						||
| 
								 | 
							
								# split = sample.split(dataset$Salary, SplitRatio = 2/3)
							 | 
						||
| 
								 | 
							
								# training_set = subset(dataset, split == TRUE)
							 | 
						||
| 
								 | 
							
								# test_set = subset(dataset, split == FALSE)
							 | 
						||
| 
								 | 
							
								# Feature Scaling
							 | 
						||
| 
								 | 
							
								# training_set = scale(training_set)
							 | 
						||
| 
								 | 
							
								# test_set = scale(test_set)
							 | 
						||
| 
								 | 
							
								# Fitting Decision Tree Regression to the dataset
							 | 
						||
| 
								 | 
							
								# install.packages('rpart')
							 | 
						||
| 
								 | 
							
								library(rpart)
							 | 
						||
| 
								 | 
							
								regressor = rpart(formula = Salary ~ .,
							 | 
						||
| 
								 | 
							
								data = dataset)
							 | 
						||
| 
								 | 
							
								# Predicting a new result
							 | 
						||
| 
								 | 
							
								y_pred = predict(regressor, data.frame(Level = 6.5))
							 | 
						||
| 
								 | 
							
								# Visualising the Decision Tree Regression results
							 | 
						||
| 
								 | 
							
								# install.packages('ggplot2')
							 | 
						||
| 
								 | 
							
								library(ggplot2)
							 | 
						||
| 
								 | 
							
								ggplot() +
							 | 
						||
| 
								 | 
							
								geom_point(aes(x = dataset$Level, y = dataset$Salary),
							 | 
						||
| 
								 | 
							
								colour = 'red') +
							 | 
						||
| 
								 | 
							
								geom_line(aes(x = dataset$Level, y = predict(regressor, newdata = dataset)),
							 | 
						||
| 
								 | 
							
								colour = 'blue') +
							 | 
						||
| 
								 | 
							
								ggtitle('Truth or Bluff (Decision Tree Regression)') +
							 | 
						||
| 
								 | 
							
								xlab('Level') +
							 | 
						||
| 
								 | 
							
								ylab('Salary')
							 | 
						||
| 
								 | 
							
								# Visualising the Decision Tree Regression results
							 | 
						||
| 
								 | 
							
								# install.packages('ggplot2')
							 | 
						||
| 
								 | 
							
								library(ggplot2)
							 | 
						||
| 
								 | 
							
								ggplot() +
							 | 
						||
| 
								 | 
							
								geom_point(aes(x = dataset$Level, y = dataset$Salary),
							 | 
						||
| 
								 | 
							
								colour = 'red') +
							 | 
						||
| 
								 | 
							
								geom_line(aes(x = dataset$Level, y = predict(regressor, newdata = dataset)),
							 | 
						||
| 
								 | 
							
								colour = 'blue') +
							 | 
						||
| 
								 | 
							
								ggtitle('Truth or Bluff (Decision Tree Regression)') +
							 | 
						||
| 
								 | 
							
								xlab('Level') +
							 | 
						||
| 
								 | 
							
								ylab('Salary')
							 | 
						||
| 
								 | 
							
								# Fitting Decision Tree Regression to the dataset
							 | 
						||
| 
								 | 
							
								# install.packages('rpart')
							 | 
						||
| 
								 | 
							
								library(rpart)
							 | 
						||
| 
								 | 
							
								regressor = rpart(formula = Salary ~ .,
							 | 
						||
| 
								 | 
							
								data = dataset,
							 | 
						||
| 
								 | 
							
								control = rpart.control(minsplit = 1))
							 | 
						||
| 
								 | 
							
								# Visualising the Decision Tree Regression results
							 | 
						||
| 
								 | 
							
								# install.packages('ggplot2')
							 | 
						||
| 
								 | 
							
								library(ggplot2)
							 | 
						||
| 
								 | 
							
								ggplot() +
							 | 
						||
| 
								 | 
							
								geom_point(aes(x = dataset$Level, y = dataset$Salary),
							 | 
						||
| 
								 | 
							
								colour = 'red') +
							 | 
						||
| 
								 | 
							
								geom_line(aes(x = dataset$Level, y = predict(regressor, newdata = dataset)),
							 | 
						||
| 
								 | 
							
								colour = 'blue') +
							 | 
						||
| 
								 | 
							
								ggtitle('Truth or Bluff (Decision Tree Regression)') +
							 | 
						||
| 
								 | 
							
								xlab('Level') +
							 | 
						||
| 
								 | 
							
								ylab('Salary')
							 | 
						||
| 
								 | 
							
								# Visualising the Decision Tree Regression results (for higher resolution and smoother curve)
							 | 
						||
| 
								 | 
							
								# install.packages('ggplot2')
							 | 
						||
| 
								 | 
							
								library(ggplot2)
							 | 
						||
| 
								 | 
							
								x_grid = seq(min(dataset$Level), max(dataset$Level), 0.1)
							 | 
						||
| 
								 | 
							
								ggplot() +
							 | 
						||
| 
								 | 
							
								geom_point(aes(x = dataset$Level, y = dataset$Salary),
							 | 
						||
| 
								 | 
							
								colour = 'red') +
							 | 
						||
| 
								 | 
							
								geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
							 | 
						||
| 
								 | 
							
								colour = 'blue') +
							 | 
						||
| 
								 | 
							
								ggtitle('Truth or Bluff (Decision Tree Regression)') +
							 | 
						||
| 
								 | 
							
								xlab('Level') +
							 | 
						||
| 
								 | 
							
								ylab('Salary')
							 | 
						||
| 
								 | 
							
								# Visualising the Decision Tree Regression results (for higher resolution and smoother curve)
							 | 
						||
| 
								 | 
							
								# install.packages('ggplot2')
							 | 
						||
| 
								 | 
							
								library(ggplot2)
							 | 
						||
| 
								 | 
							
								x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
							 | 
						||
| 
								 | 
							
								ggplot() +
							 | 
						||
| 
								 | 
							
								geom_point(aes(x = dataset$Level, y = dataset$Salary),
							 | 
						||
| 
								 | 
							
								colour = 'red') +
							 | 
						||
| 
								 | 
							
								geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
							 | 
						||
| 
								 | 
							
								colour = 'blue') +
							 | 
						||
| 
								 | 
							
								ggtitle('Truth or Bluff (Decision Tree Regression)') +
							 | 
						||
| 
								 | 
							
								xlab('Level') +
							 | 
						||
| 
								 | 
							
								ylab('Salary')
							 | 
						||
| 
								 | 
							
								y_pred = predict(regressor, data.frame(Level = 6.5))
							 | 
						||
| 
								 | 
							
								setwd("~/Desktop/Machine Learning A-Z/Part 2 - Regression/Section 9 - Random Forest Regression")
							 | 
						||
| 
								 | 
							
								# Importing the dataset
							 | 
						||
| 
								 | 
							
								dataset = read.csv('Position_Salaries.csv')
							 | 
						||
| 
								 | 
							
								dataset = dataset[2:3]
							 | 
						||
| 
								 | 
							
								View(dataset)
							 | 
						||
| 
								 | 
							
								library(randomForest)
							 | 
						||
| 
								 | 
							
								set.seed(1234)
							 | 
						||
| 
								 | 
							
								regressor = randomForest(x = dataset[1],
							 | 
						||
| 
								 | 
							
								y = dataset$Salary,
							 | 
						||
| 
								 | 
							
								ntree = 10)
							 | 
						||
| 
								 | 
							
								# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
							 | 
						||
| 
								 | 
							
								# install.packages('ggplot2')
							 | 
						||
| 
								 | 
							
								library(ggplot2)
							 | 
						||
| 
								 | 
							
								x_grid = seq(min(dataset$Level), max(dataset$Level), 0.1)
							 | 
						||
| 
								 | 
							
								ggplot() +
							 | 
						||
| 
								 | 
							
								geom_point(aes(x = dataset$Level, y = dataset$Salary),
							 | 
						||
| 
								 | 
							
								colour = 'red') +
							 | 
						||
| 
								 | 
							
								geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
							 | 
						||
| 
								 | 
							
								colour = 'blue') +
							 | 
						||
| 
								 | 
							
								ggtitle('Truth or Bluff (Random Forest Regression)') +
							 | 
						||
| 
								 | 
							
								xlab('Level') +
							 | 
						||
| 
								 | 
							
								ylab('Salary')
							 | 
						||
| 
								 | 
							
								# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
							 | 
						||
| 
								 | 
							
								# install.packages('ggplot2')
							 | 
						||
| 
								 | 
							
								library(ggplot2)
							 | 
						||
| 
								 | 
							
								x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
							 | 
						||
| 
								 | 
							
								ggplot() +
							 | 
						||
| 
								 | 
							
								geom_point(aes(x = dataset$Level, y = dataset$Salary),
							 | 
						||
| 
								 | 
							
								colour = 'red') +
							 | 
						||
| 
								 | 
							
								geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
							 | 
						||
| 
								 | 
							
								colour = 'blue') +
							 | 
						||
| 
								 | 
							
								ggtitle('Truth or Bluff (Random Forest Regression)') +
							 | 
						||
| 
								 | 
							
								xlab('Level') +
							 | 
						||
| 
								 | 
							
								ylab('Salary')
							 | 
						||
| 
								 | 
							
								set.seed(1234)
							 | 
						||
| 
								 | 
							
								regressor = randomForest(x = dataset[1],
							 | 
						||
| 
								 | 
							
								y = dataset$Salary,
							 | 
						||
| 
								 | 
							
								ntree = 100)
							 | 
						||
| 
								 | 
							
								# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
							 | 
						||
| 
								 | 
							
								# install.packages('ggplot2')
							 | 
						||
| 
								 | 
							
								library(ggplot2)
							 | 
						||
| 
								 | 
							
								x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
							 | 
						||
| 
								 | 
							
								ggplot() +
							 | 
						||
| 
								 | 
							
								geom_point(aes(x = dataset$Level, y = dataset$Salary),
							 | 
						||
| 
								 | 
							
								colour = 'red') +
							 | 
						||
| 
								 | 
							
								geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
							 | 
						||
| 
								 | 
							
								colour = 'blue') +
							 | 
						||
| 
								 | 
							
								ggtitle('Truth or Bluff (Random Forest Regression)') +
							 | 
						||
| 
								 | 
							
								xlab('Level') +
							 | 
						||
| 
								 | 
							
								ylab('Salary')
							 | 
						||
| 
								 | 
							
								y_pred = predict(regressor, data.frame(Level = 6.5))
							 | 
						||
| 
								 | 
							
								set.seed(1234)
							 | 
						||
| 
								 | 
							
								regressor = randomForest(x = dataset[1],
							 | 
						||
| 
								 | 
							
								y = dataset$Salary,
							 | 
						||
| 
								 | 
							
								ntree = 10)
							 | 
						||
| 
								 | 
							
								# Random Forest Regression
							 | 
						||
| 
								 | 
							
								# Importing the dataset
							 | 
						||
| 
								 | 
							
								dataset = read.csv('Position_Salaries.csv')
							 | 
						||
| 
								 | 
							
								dataset = dataset[2:3]
							 | 
						||
| 
								 | 
							
								# Splitting the dataset into the Training set and Test set
							 | 
						||
| 
								 | 
							
								# # install.packages('caTools')
							 | 
						||
| 
								 | 
							
								# library(caTools)
							 | 
						||
| 
								 | 
							
								# set.seed(123)
							 | 
						||
| 
								 | 
							
								# split = sample.split(dataset$Salary, SplitRatio = 2/3)
							 | 
						||
| 
								 | 
							
								# training_set = subset(dataset, split == TRUE)
							 | 
						||
| 
								 | 
							
								# test_set = subset(dataset, split == FALSE)
							 | 
						||
| 
								 | 
							
								# Feature Scaling
							 | 
						||
| 
								 | 
							
								# training_set = scale(training_set)
							 | 
						||
| 
								 | 
							
								# test_set = scale(test_set)
							 | 
						||
| 
								 | 
							
								# Fitting Random Forest Regression to the dataset
							 | 
						||
| 
								 | 
							
								# install.packages('randomForest')
							 | 
						||
| 
								 | 
							
								library(randomForest)
							 | 
						||
| 
								 | 
							
								set.seed(1234)
							 | 
						||
| 
								 | 
							
								regressor = randomForest(x = dataset[1],
							 | 
						||
| 
								 | 
							
								y = dataset$Salary,
							 | 
						||
| 
								 | 
							
								ntree = 10)
							 | 
						||
| 
								 | 
							
								# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
							 | 
						||
| 
								 | 
							
								# install.packages('ggplot2')
							 | 
						||
| 
								 | 
							
								library(ggplot2)
							 | 
						||
| 
								 | 
							
								x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
							 | 
						||
| 
								 | 
							
								ggplot() +
							 | 
						||
| 
								 | 
							
								geom_point(aes(x = dataset$Level, y = dataset$Salary),
							 | 
						||
| 
								 | 
							
								colour = 'red') +
							 | 
						||
| 
								 | 
							
								geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
							 | 
						||
| 
								 | 
							
								colour = 'blue') +
							 | 
						||
| 
								 | 
							
								ggtitle('Truth or Bluff (Random Forest Regression)') +
							 | 
						||
| 
								 | 
							
								xlab('Level') +
							 | 
						||
| 
								 | 
							
								ylab('Salary')
							 | 
						||
| 
								 | 
							
								y_pred = predict(regressor, data.frame(Level = 6.5))
							 | 
						||
| 
								 | 
							
								regressor = randomForest(x = dataset[1],
							 | 
						||
| 
								 | 
							
								y = dataset$Salary,
							 | 
						||
| 
								 | 
							
								ntree = 100)
							 | 
						||
| 
								 | 
							
								# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
							 | 
						||
| 
								 | 
							
								# install.packages('ggplot2')
							 | 
						||
| 
								 | 
							
								library(ggplot2)
							 | 
						||
| 
								 | 
							
								x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
							 | 
						||
| 
								 | 
							
								ggplot() +
							 | 
						||
| 
								 | 
							
								geom_point(aes(x = dataset$Level, y = dataset$Salary),
							 | 
						||
| 
								 | 
							
								colour = 'red') +
							 | 
						||
| 
								 | 
							
								geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
							 | 
						||
| 
								 | 
							
								colour = 'blue') +
							 | 
						||
| 
								 | 
							
								ggtitle('Truth or Bluff (Random Forest Regression)') +
							 | 
						||
| 
								 | 
							
								xlab('Level') +
							 | 
						||
| 
								 | 
							
								ylab('Salary')
							 | 
						||
| 
								 | 
							
								y_pred = predict(regressor, data.frame(Level = 6.5))
							 | 
						||
| 
								 | 
							
								set.seed(1234)
							 | 
						||
| 
								 | 
							
								regressor = randomForest(x = dataset[1],
							 | 
						||
| 
								 | 
							
								y = dataset$Salary,
							 | 
						||
| 
								 | 
							
								ntree = 500)
							 | 
						||
| 
								 | 
							
								# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
							 | 
						||
| 
								 | 
							
								# install.packages('ggplot2')
							 | 
						||
| 
								 | 
							
								library(ggplot2)
							 | 
						||
| 
								 | 
							
								x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
							 | 
						||
| 
								 | 
							
								ggplot() +
							 | 
						||
| 
								 | 
							
								geom_point(aes(x = dataset$Level, y = dataset$Salary),
							 | 
						||
| 
								 | 
							
								colour = 'red') +
							 | 
						||
| 
								 | 
							
								geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
							 | 
						||
| 
								 | 
							
								colour = 'blue') +
							 | 
						||
| 
								 | 
							
								ggtitle('Truth or Bluff (Random Forest Regression)') +
							 | 
						||
| 
								 | 
							
								xlab('Level') +
							 | 
						||
| 
								 | 
							
								ylab('Salary')
							 | 
						||
| 
								 | 
							
								y_pred = predict(regressor, data.frame(Level = 6.5))
							 | 
						||
| 
								 | 
							
								# Importing the dataset
							 | 
						||
| 
								 | 
							
								dataset = read.csv('Position_Salaries.csv')
							 | 
						||
| 
								 | 
							
								dataset = dataset[2:3]
							 | 
						||
| 
								 | 
							
								library(randomForest)
							 | 
						||
| 
								 | 
							
								set.seed(1234)
							 | 
						||
| 
								 | 
							
								regressor = randomForest(x = dataset[1],
							 | 
						||
| 
								 | 
							
								y = dataset$Salary,
							 | 
						||
| 
								 | 
							
								ntree = 500)
							 | 
						||
| 
								 | 
							
								# Fitting Random Forest Regression to the dataset
							 | 
						||
| 
								 | 
							
								# install.packages('randomForest')
							 | 
						||
| 
								 | 
							
								library(randomForest)
							 | 
						||
| 
								 | 
							
								set.seed(1234)
							 | 
						||
| 
								 | 
							
								regressor = randomForest(x = dataset[1],
							 | 
						||
| 
								 | 
							
								y = dataset$Salary,
							 | 
						||
| 
								 | 
							
								ntree = 10)
							 | 
						||
| 
								 | 
							
								# Random Forest Regression
							 | 
						||
| 
								 | 
							
								# Importing the dataset
							 | 
						||
| 
								 | 
							
								dataset = read.csv('Position_Salaries.csv')
							 | 
						||
| 
								 | 
							
								dataset = dataset[2:3]
							 | 
						||
| 
								 | 
							
								# Splitting the dataset into the Training set and Test set
							 | 
						||
| 
								 | 
							
								# # install.packages('caTools')
							 | 
						||
| 
								 | 
							
								# library(caTools)
							 | 
						||
| 
								 | 
							
								# set.seed(123)
							 | 
						||
| 
								 | 
							
								# split = sample.split(dataset$Salary, SplitRatio = 2/3)
							 | 
						||
| 
								 | 
							
								# training_set = subset(dataset, split == TRUE)
							 | 
						||
| 
								 | 
							
								# test_set = subset(dataset, split == FALSE)
							 | 
						||
| 
								 | 
							
								# Feature Scaling
							 | 
						||
| 
								 | 
							
								# training_set = scale(training_set)
							 | 
						||
| 
								 | 
							
								# test_set = scale(test_set)
							 | 
						||
| 
								 | 
							
								# Fitting Random Forest Regression to the dataset
							 | 
						||
| 
								 | 
							
								# install.packages('randomForest')
							 | 
						||
| 
								 | 
							
								library(randomForest)
							 | 
						||
| 
								 | 
							
								set.seed(1234)
							 | 
						||
| 
								 | 
							
								regressor = randomForest(x = dataset[1],
							 | 
						||
| 
								 | 
							
								y = dataset$Salary,
							 | 
						||
| 
								 | 
							
								ntree = 10)
							 | 
						||
| 
								 | 
							
								# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
							 | 
						||
| 
								 | 
							
								# install.packages('ggplot2')
							 | 
						||
| 
								 | 
							
								library(ggplot2)
							 | 
						||
| 
								 | 
							
								x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
							 | 
						||
| 
								 | 
							
								ggplot() +
							 | 
						||
| 
								 | 
							
								geom_point(aes(x = dataset$Level, y = dataset$Salary),
							 | 
						||
| 
								 | 
							
								colour = 'red') +
							 | 
						||
| 
								 | 
							
								geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
							 | 
						||
| 
								 | 
							
								colour = 'blue') +
							 | 
						||
| 
								 | 
							
								ggtitle('Truth or Bluff (Random Forest Regression)') +
							 | 
						||
| 
								 | 
							
								xlab('Level') +
							 | 
						||
| 
								 | 
							
								ylab('Salary')
							 | 
						||
| 
								 | 
							
								setwd("~/Desktop/Machine Learning A-Z/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------")
							 | 
						||
| 
								 | 
							
								dataset = read.csv('Data.csv')
							 | 
						||
| 
								 | 
							
								View(dataset)
							 | 
						||
| 
								 | 
							
								dataset$Age = ifelse(is.na(dataset$Age),
							 | 
						||
| 
								 | 
							
								ave(dataset$Age, dataset$Country, FUN = function(x) mean(x, na.rm = TRUE)),
							 | 
						||
| 
								 | 
							
								dataset$Age)
							 | 
						||
| 
								 | 
							
								dataset$Salary = ifelse(is.na(dataset$Salary),
							 | 
						||
| 
								 | 
							
								ave(dataset$Salary, dataset$Country, FUN = function(x) mean(x, na.rm = TRUE)),
							 | 
						||
| 
								 | 
							
								dataset$Salary)
							 | 
						||
| 
								 | 
							
								dataset = read.csv('Data.csv')
							 | 
						||
| 
								 | 
							
								setwd("~/Downloads/Drug Final/Drug Final")
							 | 
						||
| 
								 | 
							
								drugdata <- read.csv("Drug_data.csv", header = T)
							 | 
						||
| 
								 | 
							
								View(drugdata)
							 | 
						||
| 
								 | 
							
								drug_model = rpart(formula = DFREE ~ ., data = training_set)
							 | 
						||
| 
								 | 
							
								# Splitting the dataset into the Training set and Test set
							 | 
						||
| 
								 | 
							
								# install.packages('caTools')
							 | 
						||
| 
								 | 
							
								library(caTools)
							 | 
						||
| 
								 | 
							
								set.seed(123)
							 | 
						||
| 
								 | 
							
								split <- sample.split(drugdata$DFREE, SplitRatio = 0.75)
							 | 
						||
| 
								 | 
							
								training_set <- subset(drugdata,split == TRUE)
							 | 
						||
| 
								 | 
							
								test_set <- subset(drugdata,split == FALSE)
							 | 
						||
| 
								 | 
							
								library(rpart)
							 | 
						||
| 
								 | 
							
								library(partykit)
							 | 
						||
| 
								 | 
							
								drug_model = rpart(formula = DFREE ~ ., data = training_set)
							 | 
						||
| 
								 | 
							
								plot(drug_model)
							 | 
						||
| 
								 | 
							
								text(drug_model, pretty = 0)
							 | 
						||
| 
								 | 
							
								library(ROCR)
							 | 
						||
| 
								 | 
							
								ROCRpred <- prediction(test_set$prob[,1], test_set$DFREE)
							 | 
						||
| 
								 | 
							
								ROCRperf <- performance(ROCRpred, 'tpr','fpr')
							 | 
						||
| 
								 | 
							
								plot(ROCRperf, colorize = TRUE, text.adj = c(-0.2,1.7))
							 | 
						||
| 
								 | 
							
								abline(0,1, lty = 8, col = 'grey')
							 | 
						||
| 
								 | 
							
								AUCperf <- performance(ROCRpred, 'auc')
							 | 
						||
| 
								 | 
							
								attr(AUCperf, 'y.values')
							 | 
						||
| 
								 | 
							
								# Deciding the optimal cut off point.
							 | 
						||
| 
								 | 
							
								test_set$pred_et<- ifelse(test_set$prob > 0.27 , 1 ,0)
							 | 
						||
| 
								 | 
							
								test_set$pred_et <- as.factor(test_set$pred_et[,1])
							 | 
						||
| 
								 | 
							
								#Confusion Marix
							 | 
						||
| 
								 | 
							
								cm <- table(test_set$DFREE, test_set$pred_et)
							 | 
						||
| 
								 | 
							
								cm
							 | 
						||
| 
								 | 
							
								Efficiency <- sum(diag(cm))/sum(cm)
							 | 
						||
| 
								 | 
							
								Efficiency
							 | 
						||
| 
								 | 
							
								drugdata <- read.csv("Drug_data.csv", header = T)
							 | 
						||
| 
								 | 
							
								View(drugdata)
							 | 
						||
| 
								 | 
							
								# PCA
							 | 
						||
| 
								 | 
							
								# Importing the dataset
							 | 
						||
| 
								 | 
							
								dataset = read.csv('Wine.csv')
							 | 
						||
| 
								 | 
							
								# Splitting the dataset into the Training set and Test set
							 | 
						||
| 
								 | 
							
								# install.packages('caTools')
							 | 
						||
| 
								 | 
							
								library(caTools)
							 | 
						||
| 
								 | 
							
								set.seed(123)
							 | 
						||
| 
								 | 
							
								split = sample.split(dataset$Customer_Segment, SplitRatio = 0.8)
							 | 
						||
| 
								 | 
							
								training_set = subset(dataset, split == TRUE)
							 | 
						||
| 
								 | 
							
								test_set = subset(dataset, split == FALSE)
							 | 
						||
| 
								 | 
							
								# Feature Scaling
							 | 
						||
| 
								 | 
							
								training_set[-14] = scale(training_set[-14])
							 | 
						||
| 
								 | 
							
								test_set[-14] = scale(test_set[-14])
							 | 
						||
| 
								 | 
							
								# Applying PCA
							 | 
						||
| 
								 | 
							
								# install.packages('caret')
							 | 
						||
| 
								 | 
							
								library(caret)
							 | 
						||
| 
								 | 
							
								# install.packages('e1071')
							 | 
						||
| 
								 | 
							
								library(e1071)
							 | 
						||
| 
								 | 
							
								pca = preProcess(x = training_set[-14], method = 'pca', pcaComp = 2)
							 | 
						||
| 
								 | 
							
								training_set = predict(pca, training_set)
							 | 
						||
| 
								 | 
							
								training_set = training_set[c(2, 3, 1)]
							 | 
						||
| 
								 | 
							
								test_set = predict(pca, test_set)
							 | 
						||
| 
								 | 
							
								test_set = test_set[c(2, 3, 1)]
							 | 
						||
| 
								 | 
							
								a = 2
							 | 
						||
| 
								 | 
							
								a = 2
							 | 
						||
| 
								 | 
							
								dat <- readLines('ratings.dat')
							 | 
						||
| 
								 | 
							
								dat <- gsub("::", " ", dat)
							 | 
						||
| 
								 | 
							
								dat <- textConnection(dat)
							 | 
						||
| 
								 | 
							
								dat <- read.table(dat)
							 | 
						||
| 
								 | 
							
								setwd("~/Documents/Udemy/Teaching/Deep Learning A-Z/Datasets/ml-1m")
							 | 
						||
| 
								 | 
							
								dat <- readLines('ratings.dat')
							 | 
						||
| 
								 | 
							
								dat <- gsub("::", " ", dat)
							 | 
						||
| 
								 | 
							
								dat <- textConnection(dat)
							 | 
						||
| 
								 | 
							
								dat <- read.table(dat)
							 | 
						||
| 
								 | 
							
								View(dat)
							 | 
						||
| 
								 | 
							
								library(caTools)
							 | 
						||
| 
								 | 
							
								split = sample.split(dataset$V1, SplitRatio = 0.75)
							 | 
						||
| 
								 | 
							
								split = sample.split(dat$V1, SplitRatio = 0.75)
							 | 
						||
| 
								 | 
							
								training_set = subset(dat, split == TRUE)
							 | 
						||
| 
								 | 
							
								test_set = subset(dat, split == FALSE)
							 | 
						||
| 
								 | 
							
								View(training_set)
							 | 
						||
| 
								 | 
							
								View(test_set)
							 | 
						||
| 
								 | 
							
								library("foreign", lib.loc="/Library/Frameworks/R.framework/Versions/3.3/Resources/library")
							 | 
						||
| 
								 | 
							
								write.table(training_set, "training_set.dat", sep = "\t")
							 | 
						||
| 
								 | 
							
								write.table(test_set, "test_set.dat", sep = "\t")
							 | 
						||
| 
								 | 
							
								View(training_set)
							 | 
						||
| 
								 | 
							
								write.table(training_set-2, "training_set.txt", sep = "\t")
							 | 
						||
| 
								 | 
							
								dataset = read.csv('ratings_bis.csv', header = FALSE)
							 | 
						||
| 
								 | 
							
								dataset = read.delim('ratings_bis.csv', header = FALSE, sep = '\t')
							 | 
						||
| 
								 | 
							
								View(dataset)
							 | 
						||
| 
								 | 
							
								dataset = read.csv('ratings_bis.csv', header = FALSE)
							 | 
						||
| 
								 | 
							
								View(dataset)
							 | 
						||
| 
								 | 
							
								library(caTools)
							 | 
						||
| 
								 | 
							
								split = sample.split(dataset$V1, SplitRatio = 0.75)
							 | 
						||
| 
								 | 
							
								training_set = subset(dataset, split == TRUE)
							 | 
						||
| 
								 | 
							
								test_set = subset(dataset, split == FALSE)
							 | 
						||
| 
								 | 
							
								write.table(training_set, "training_set.csv")
							 | 
						||
| 
								 | 
							
								write.table(test_set, "test_set.csv")
							 | 
						||
| 
								 | 
							
								dataset = read.csv('ratings_bis.csv', header = FALSE)
							 | 
						||
| 
								 | 
							
								View(dataset)
							 | 
						||
| 
								 | 
							
								colnames(dataset) <- c("User","Movie","Rating","Timestamp")
							 | 
						||
| 
								 | 
							
								library(caTools)
							 | 
						||
| 
								 | 
							
								split = sample.split(dataset$User, SplitRatio = 0.75)
							 | 
						||
| 
								 | 
							
								training_set = subset(dataset, split == TRUE)
							 | 
						||
| 
								 | 
							
								test_set = subset(dataset, split == FALSE)
							 | 
						||
| 
								 | 
							
								write.csv(training_set, "training_set.csv")
							 | 
						||
| 
								 | 
							
								write.csv(test_set, "test_set.csv")
							 | 
						||
| 
								 | 
							
								write.csv(training_set, "training_set.csv")
							 | 
						||
| 
								 | 
							
								write.csv(test_set, "test_set.csv")
							 | 
						||
| 
								 | 
							
								View(training_set)
							 | 
						||
| 
								 | 
							
								training_set = training_set
							 | 
						||
| 
								 | 
							
								View(training_set)
							 | 
						||
| 
								 | 
							
								training_set = read.csv('training_set.csv')
							 | 
						||
| 
								 | 
							
								test_set = read.csv('test_set.csv')
							 | 
						||
| 
								 | 
							
								training_set = training_set[1:5]
							 | 
						||
| 
								 | 
							
								View(training_set)
							 | 
						||
| 
								 | 
							
								training_set = read.csv('training_set.csv')
							 | 
						||
| 
								 | 
							
								test_set = read.csv('test_set.csv')
							 | 
						||
| 
								 | 
							
								training_set = training_set[2:5]
							 | 
						||
| 
								 | 
							
								test_set = test_set[2:5]
							 | 
						||
| 
								 | 
							
								View(training_set)
							 | 
						||
| 
								 | 
							
								View(test_set)
							 | 
						||
| 
								 | 
							
								training_set = read.csv('training_set.csv')
							 | 
						||
| 
								 | 
							
								training_set = read.csv('training_set.csv')
							 | 
						||
| 
								 | 
							
								test_set = read.csv('test_set.csv')
							 | 
						||
| 
								 | 
							
								training_set = training_set[2:5]
							 | 
						||
| 
								 | 
							
								test_set = test_set[2:5]
							 | 
						||
| 
								 | 
							
								View(training_set)
							 | 
						||
| 
								 | 
							
								View(test_set)
							 | 
						||
| 
								 | 
							
								write.csv(training_set, "training_set.csv")
							 | 
						||
| 
								 | 
							
								write.csv(test_set, "test_set.csv")
							 | 
						||
| 
								 | 
							
								dataset = read.csv('ratings.csv', header = FALSE)
							 | 
						||
| 
								 | 
							
								colnames(dataset) = c("User","Movie","Rating","Timestamp")
							 | 
						||
| 
								 | 
							
								dataset = read.csv('ratings.csv', header = FALSE)
							 | 
						||
| 
								 | 
							
								colnames(dataset) = c("User","Movie","Rating","Timestamp")
							 | 
						||
| 
								 | 
							
								View(dataset)
							 | 
						||
| 
								 | 
							
								library(caTools)
							 | 
						||
| 
								 | 
							
								split = sample.split(dataset$User, SplitRatio = 0.75)
							 | 
						||
| 
								 | 
							
								training_set = subset(dataset, split == TRUE)
							 | 
						||
| 
								 | 
							
								test_set = subset(dataset, split == FALSE)
							 | 
						||
| 
								 | 
							
								write.csv(training_set, "training_set.csv")
							 | 
						||
| 
								 | 
							
								write.csv(test_set, "test_set.csv")
							 |