# Predicting a new result y_pred = predict(regressor, data.frame(Level = 6.5)) # Decision Tree Regression # Importing the dataset dataset = read.csv('Position_Salaries.csv') dataset = dataset[2:3] # Splitting the dataset into the Training set and Test set # # install.packages('caTools') # library(caTools) # set.seed(123) # split = sample.split(dataset$Salary, SplitRatio = 2/3) # training_set = subset(dataset, split == TRUE) # test_set = subset(dataset, split == FALSE) # Feature Scaling # training_set = scale(training_set) # test_set = scale(test_set) # Fitting Decision Tree Regression to the dataset # install.packages('rpart') library(rpart) regressor = rpart(formula = Salary ~ ., data = dataset, control = rpart.control(minsplit = 1)) # Predicting a new result y_pred = predict(regressor, data.frame(Level = 6.5)) # Decision Tree Regression # Importing the dataset dataset = read.csv('Position_Salaries.csv') dataset = dataset[2:3] # Splitting the dataset into the Training set and Test set # # install.packages('caTools') # library(caTools) # set.seed(123) # split = sample.split(dataset$Salary, SplitRatio = 2/3) # training_set = subset(dataset, split == TRUE) # test_set = subset(dataset, split == FALSE) # Feature Scaling # training_set = scale(training_set) # test_set = scale(test_set) # Fitting Decision Tree Regression to the dataset # install.packages('rpart') library(rpart) regressor = rpart(formula = Salary ~ ., data = dataset, control = rpart.control(minsplit = 1)) # Predicting a new result y_pred = predict(regressor, data.frame(Level = 6.5)) # Decision Tree Regression # Importing the dataset dataset = read.csv('Position_Salaries.csv') dataset = dataset[2:3] # Splitting the dataset into the Training set and Test set # # install.packages('caTools') # library(caTools) # set.seed(123) # split = sample.split(dataset$Salary, SplitRatio = 2/3) # training_set = subset(dataset, split == TRUE) # test_set = subset(dataset, split == FALSE) # Feature Scaling # training_set = scale(training_set) # test_set = scale(test_set) # Fitting Decision Tree Regression to the dataset # install.packages('rpart') library(rpart) regressor = rpart(formula = Salary ~ ., data = dataset, control = rpart.control(minsplit = 1)) # Predicting a new result y_pred = predict(regressor, data.frame(Level = 6.5)) # Decision Tree Regression # Importing the dataset dataset = read.csv('Position_Salaries.csv') dataset = dataset[2:3] # Splitting the dataset into the Training set and Test set # # install.packages('caTools') # library(caTools) # set.seed(123) # split = sample.split(dataset$Salary, SplitRatio = 2/3) # training_set = subset(dataset, split == TRUE) # test_set = subset(dataset, split == FALSE) # Feature Scaling # training_set = scale(training_set) # test_set = scale(test_set) # Fitting Decision Tree Regression to the dataset # install.packages('rpart') library(rpart) regressor = rpart(formula = Salary ~ ., data = dataset, control = rpart.control(minsplit = 1)) # Predicting a new result y_pred = predict(regressor, data.frame(Level = 6.5)) # Decision Tree Regression # Importing the dataset dataset = read.csv('Position_Salaries.csv') dataset = dataset[2:3] # Splitting the dataset into the Training set and Test set # # install.packages('caTools') # library(caTools) # set.seed(123) # split = sample.split(dataset$Salary, SplitRatio = 2/3) # training_set = subset(dataset, split == TRUE) # test_set = subset(dataset, split == FALSE) # Feature Scaling # training_set = scale(training_set) # test_set = scale(test_set) # Fitting Decision Tree Regression to the dataset # install.packages('rpart') library(rpart) regressor = rpart(formula = Salary ~ ., data = dataset, control = rpart.control(minsplit = 1)) # Predicting a new result y_pred = predict(regressor, data.frame(Level = 6.5)) # Decision Tree Regression # Importing the dataset dataset = read.csv('Position_Salaries.csv') dataset = dataset[2:3] # Splitting the dataset into the Training set and Test set # # install.packages('caTools') # library(caTools) # set.seed(123) # split = sample.split(dataset$Salary, SplitRatio = 2/3) # training_set = subset(dataset, split == TRUE) # test_set = subset(dataset, split == FALSE) # Feature Scaling # training_set = scale(training_set) # test_set = scale(test_set) # Fitting Decision Tree Regression to the dataset # install.packages('rpart') library(rpart) regressor = rpart(formula = Salary ~ ., data = dataset) # Predicting a new result y_pred = predict(regressor, data.frame(Level = 6.5)) # Visualising the Decision Tree Regression results # install.packages('ggplot2') library(ggplot2) ggplot() + geom_point(aes(x = dataset$Level, y = dataset$Salary), colour = 'red') + geom_line(aes(x = dataset$Level, y = predict(regressor, newdata = dataset)), colour = 'blue') + ggtitle('Truth or Bluff (Decision Tree Regression)') + xlab('Level') + ylab('Salary') # Visualising the Decision Tree Regression results # install.packages('ggplot2') library(ggplot2) ggplot() + geom_point(aes(x = dataset$Level, y = dataset$Salary), colour = 'red') + geom_line(aes(x = dataset$Level, y = predict(regressor, newdata = dataset)), colour = 'blue') + ggtitle('Truth or Bluff (Decision Tree Regression)') + xlab('Level') + ylab('Salary') # Fitting Decision Tree Regression to the dataset # install.packages('rpart') library(rpart) regressor = rpart(formula = Salary ~ ., data = dataset, control = rpart.control(minsplit = 1)) # Visualising the Decision Tree Regression results # install.packages('ggplot2') library(ggplot2) ggplot() + geom_point(aes(x = dataset$Level, y = dataset$Salary), colour = 'red') + geom_line(aes(x = dataset$Level, y = predict(regressor, newdata = dataset)), colour = 'blue') + ggtitle('Truth or Bluff (Decision Tree Regression)') + xlab('Level') + ylab('Salary') # Visualising the Decision Tree Regression results (for higher resolution and smoother curve) # install.packages('ggplot2') library(ggplot2) x_grid = seq(min(dataset$Level), max(dataset$Level), 0.1) ggplot() + geom_point(aes(x = dataset$Level, y = dataset$Salary), colour = 'red') + geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))), colour = 'blue') + ggtitle('Truth or Bluff (Decision Tree Regression)') + xlab('Level') + ylab('Salary') # Visualising the Decision Tree Regression results (for higher resolution and smoother curve) # install.packages('ggplot2') library(ggplot2) x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01) ggplot() + geom_point(aes(x = dataset$Level, y = dataset$Salary), colour = 'red') + geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))), colour = 'blue') + ggtitle('Truth or Bluff (Decision Tree Regression)') + xlab('Level') + ylab('Salary') y_pred = predict(regressor, data.frame(Level = 6.5)) setwd("~/Desktop/Machine Learning A-Z/Part 2 - Regression/Section 9 - Random Forest Regression") # Importing the dataset dataset = read.csv('Position_Salaries.csv') dataset = dataset[2:3] View(dataset) library(randomForest) set.seed(1234) regressor = randomForest(x = dataset[1], y = dataset$Salary, ntree = 10) # Visualising the Random Forest Regression results (for higher resolution and smoother curve) # install.packages('ggplot2') library(ggplot2) x_grid = seq(min(dataset$Level), max(dataset$Level), 0.1) ggplot() + geom_point(aes(x = dataset$Level, y = dataset$Salary), colour = 'red') + geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))), colour = 'blue') + ggtitle('Truth or Bluff (Random Forest Regression)') + xlab('Level') + ylab('Salary') # Visualising the Random Forest Regression results (for higher resolution and smoother curve) # install.packages('ggplot2') library(ggplot2) x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01) ggplot() + geom_point(aes(x = dataset$Level, y = dataset$Salary), colour = 'red') + geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))), colour = 'blue') + ggtitle('Truth or Bluff (Random Forest Regression)') + xlab('Level') + ylab('Salary') set.seed(1234) regressor = randomForest(x = dataset[1], y = dataset$Salary, ntree = 100) # Visualising the Random Forest Regression results (for higher resolution and smoother curve) # install.packages('ggplot2') library(ggplot2) x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01) ggplot() + geom_point(aes(x = dataset$Level, y = dataset$Salary), colour = 'red') + geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))), colour = 'blue') + ggtitle('Truth or Bluff (Random Forest Regression)') + xlab('Level') + ylab('Salary') y_pred = predict(regressor, data.frame(Level = 6.5)) set.seed(1234) regressor = randomForest(x = dataset[1], y = dataset$Salary, ntree = 10) # Random Forest Regression # Importing the dataset dataset = read.csv('Position_Salaries.csv') dataset = dataset[2:3] # Splitting the dataset into the Training set and Test set # # install.packages('caTools') # library(caTools) # set.seed(123) # split = sample.split(dataset$Salary, SplitRatio = 2/3) # training_set = subset(dataset, split == TRUE) # test_set = subset(dataset, split == FALSE) # Feature Scaling # training_set = scale(training_set) # test_set = scale(test_set) # Fitting Random Forest Regression to the dataset # install.packages('randomForest') library(randomForest) set.seed(1234) regressor = randomForest(x = dataset[1], y = dataset$Salary, ntree = 10) # Visualising the Random Forest Regression results (for higher resolution and smoother curve) # install.packages('ggplot2') library(ggplot2) x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01) ggplot() + geom_point(aes(x = dataset$Level, y = dataset$Salary), colour = 'red') + geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))), colour = 'blue') + ggtitle('Truth or Bluff (Random Forest Regression)') + xlab('Level') + ylab('Salary') y_pred = predict(regressor, data.frame(Level = 6.5)) regressor = randomForest(x = dataset[1], y = dataset$Salary, ntree = 100) # Visualising the Random Forest Regression results (for higher resolution and smoother curve) # install.packages('ggplot2') library(ggplot2) x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01) ggplot() + geom_point(aes(x = dataset$Level, y = dataset$Salary), colour = 'red') + geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))), colour = 'blue') + ggtitle('Truth or Bluff (Random Forest Regression)') + xlab('Level') + ylab('Salary') y_pred = predict(regressor, data.frame(Level = 6.5)) set.seed(1234) regressor = randomForest(x = dataset[1], y = dataset$Salary, ntree = 500) # Visualising the Random Forest Regression results (for higher resolution and smoother curve) # install.packages('ggplot2') library(ggplot2) x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01) ggplot() + geom_point(aes(x = dataset$Level, y = dataset$Salary), colour = 'red') + geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))), colour = 'blue') + ggtitle('Truth or Bluff (Random Forest Regression)') + xlab('Level') + ylab('Salary') y_pred = predict(regressor, data.frame(Level = 6.5)) # Importing the dataset dataset = read.csv('Position_Salaries.csv') dataset = dataset[2:3] library(randomForest) set.seed(1234) regressor = randomForest(x = dataset[1], y = dataset$Salary, ntree = 500) # Fitting Random Forest Regression to the dataset # install.packages('randomForest') library(randomForest) set.seed(1234) regressor = randomForest(x = dataset[1], y = dataset$Salary, ntree = 10) # Random Forest Regression # Importing the dataset dataset = read.csv('Position_Salaries.csv') dataset = dataset[2:3] # Splitting the dataset into the Training set and Test set # # install.packages('caTools') # library(caTools) # set.seed(123) # split = sample.split(dataset$Salary, SplitRatio = 2/3) # training_set = subset(dataset, split == TRUE) # test_set = subset(dataset, split == FALSE) # Feature Scaling # training_set = scale(training_set) # test_set = scale(test_set) # Fitting Random Forest Regression to the dataset # install.packages('randomForest') library(randomForest) set.seed(1234) regressor = randomForest(x = dataset[1], y = dataset$Salary, ntree = 10) # Visualising the Random Forest Regression results (for higher resolution and smoother curve) # install.packages('ggplot2') library(ggplot2) x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01) ggplot() + geom_point(aes(x = dataset$Level, y = dataset$Salary), colour = 'red') + geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))), colour = 'blue') + ggtitle('Truth or Bluff (Random Forest Regression)') + xlab('Level') + ylab('Salary') setwd("~/Desktop/Machine Learning A-Z/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------") dataset = read.csv('Data.csv') View(dataset) dataset$Age = ifelse(is.na(dataset$Age), ave(dataset$Age, dataset$Country, FUN = function(x) mean(x, na.rm = TRUE)), dataset$Age) dataset$Salary = ifelse(is.na(dataset$Salary), ave(dataset$Salary, dataset$Country, FUN = function(x) mean(x, na.rm = TRUE)), dataset$Salary) dataset = read.csv('Data.csv') setwd("~/Downloads/Drug Final/Drug Final") drugdata <- read.csv("Drug_data.csv", header = T) View(drugdata) drug_model = rpart(formula = DFREE ~ ., data = training_set) # Splitting the dataset into the Training set and Test set # install.packages('caTools') library(caTools) set.seed(123) split <- sample.split(drugdata$DFREE, SplitRatio = 0.75) training_set <- subset(drugdata,split == TRUE) test_set <- subset(drugdata,split == FALSE) library(rpart) library(partykit) drug_model = rpart(formula = DFREE ~ ., data = training_set) plot(drug_model) text(drug_model, pretty = 0) library(ROCR) ROCRpred <- prediction(test_set$prob[,1], test_set$DFREE) ROCRperf <- performance(ROCRpred, 'tpr','fpr') plot(ROCRperf, colorize = TRUE, text.adj = c(-0.2,1.7)) abline(0,1, lty = 8, col = 'grey') AUCperf <- performance(ROCRpred, 'auc') attr(AUCperf, 'y.values') # Deciding the optimal cut off point. test_set$pred_et<- ifelse(test_set$prob > 0.27 , 1 ,0) test_set$pred_et <- as.factor(test_set$pred_et[,1]) #Confusion Marix cm <- table(test_set$DFREE, test_set$pred_et) cm Efficiency <- sum(diag(cm))/sum(cm) Efficiency drugdata <- read.csv("Drug_data.csv", header = T) View(drugdata) # PCA # Importing the dataset dataset = read.csv('Wine.csv') # Splitting the dataset into the Training set and Test set # install.packages('caTools') library(caTools) set.seed(123) split = sample.split(dataset$Customer_Segment, SplitRatio = 0.8) training_set = subset(dataset, split == TRUE) test_set = subset(dataset, split == FALSE) # Feature Scaling training_set[-14] = scale(training_set[-14]) test_set[-14] = scale(test_set[-14]) # Applying PCA # install.packages('caret') library(caret) # install.packages('e1071') library(e1071) pca = preProcess(x = training_set[-14], method = 'pca', pcaComp = 2) training_set = predict(pca, training_set) training_set = training_set[c(2, 3, 1)] test_set = predict(pca, test_set) test_set = test_set[c(2, 3, 1)] a = 2 a = 2 dat <- readLines('ratings.dat') dat <- gsub("::", " ", dat) dat <- textConnection(dat) dat <- read.table(dat) setwd("~/Documents/Udemy/Teaching/Deep Learning A-Z/Datasets/ml-1m") dat <- readLines('ratings.dat') dat <- gsub("::", " ", dat) dat <- textConnection(dat) dat <- read.table(dat) View(dat) library(caTools) split = sample.split(dataset$V1, SplitRatio = 0.75) split = sample.split(dat$V1, SplitRatio = 0.75) training_set = subset(dat, split == TRUE) test_set = subset(dat, split == FALSE) View(training_set) View(test_set) library("foreign", lib.loc="/Library/Frameworks/R.framework/Versions/3.3/Resources/library") write.table(training_set, "training_set.dat", sep = "\t") write.table(test_set, "test_set.dat", sep = "\t") View(training_set) write.table(training_set-2, "training_set.txt", sep = "\t") dataset = read.csv('ratings_bis.csv', header = FALSE) dataset = read.delim('ratings_bis.csv', header = FALSE, sep = '\t') View(dataset) dataset = read.csv('ratings_bis.csv', header = FALSE) View(dataset) library(caTools) split = sample.split(dataset$V1, SplitRatio = 0.75) training_set = subset(dataset, split == TRUE) test_set = subset(dataset, split == FALSE) write.table(training_set, "training_set.csv") write.table(test_set, "test_set.csv") dataset = read.csv('ratings_bis.csv', header = FALSE) View(dataset) colnames(dataset) <- c("User","Movie","Rating","Timestamp") library(caTools) split = sample.split(dataset$User, SplitRatio = 0.75) training_set = subset(dataset, split == TRUE) test_set = subset(dataset, split == FALSE) write.csv(training_set, "training_set.csv") write.csv(test_set, "test_set.csv") write.csv(training_set, "training_set.csv") write.csv(test_set, "test_set.csv") View(training_set) training_set = training_set View(training_set) training_set = read.csv('training_set.csv') test_set = read.csv('test_set.csv') training_set = training_set[1:5] View(training_set) training_set = read.csv('training_set.csv') test_set = read.csv('test_set.csv') training_set = training_set[2:5] test_set = test_set[2:5] View(training_set) View(test_set) training_set = read.csv('training_set.csv') training_set = read.csv('training_set.csv') test_set = read.csv('test_set.csv') training_set = training_set[2:5] test_set = test_set[2:5] View(training_set) View(test_set) write.csv(training_set, "training_set.csv") write.csv(test_set, "test_set.csv") dataset = read.csv('ratings.csv', header = FALSE) colnames(dataset) = c("User","Movie","Rating","Timestamp") dataset = read.csv('ratings.csv', header = FALSE) colnames(dataset) = c("User","Movie","Rating","Timestamp") View(dataset) library(caTools) split = sample.split(dataset$User, SplitRatio = 0.75) training_set = subset(dataset, split == TRUE) test_set = subset(dataset, split == FALSE) write.csv(training_set, "training_set.csv") write.csv(test_set, "test_set.csv")