introduction-to-deep-learning/Le Deep Learning de A a Z/Part 5 - Boltzmann_Machines/ml-1m/.Rhistory

513 lines
18 KiB
R

# Predicting a new result
y_pred = predict(regressor, data.frame(Level = 6.5))
# Decision Tree Regression
# Importing the dataset
dataset = read.csv('Position_Salaries.csv')
dataset = dataset[2:3]
# Splitting the dataset into the Training set and Test set
# # install.packages('caTools')
# library(caTools)
# set.seed(123)
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
# training_set = subset(dataset, split == TRUE)
# test_set = subset(dataset, split == FALSE)
# Feature Scaling
# training_set = scale(training_set)
# test_set = scale(test_set)
# Fitting Decision Tree Regression to the dataset
# install.packages('rpart')
library(rpart)
regressor = rpart(formula = Salary ~ .,
data = dataset,
control = rpart.control(minsplit = 1))
# Predicting a new result
y_pred = predict(regressor, data.frame(Level = 6.5))
# Decision Tree Regression
# Importing the dataset
dataset = read.csv('Position_Salaries.csv')
dataset = dataset[2:3]
# Splitting the dataset into the Training set and Test set
# # install.packages('caTools')
# library(caTools)
# set.seed(123)
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
# training_set = subset(dataset, split == TRUE)
# test_set = subset(dataset, split == FALSE)
# Feature Scaling
# training_set = scale(training_set)
# test_set = scale(test_set)
# Fitting Decision Tree Regression to the dataset
# install.packages('rpart')
library(rpart)
regressor = rpart(formula = Salary ~ .,
data = dataset,
control = rpart.control(minsplit = 1))
# Predicting a new result
y_pred = predict(regressor, data.frame(Level = 6.5))
# Decision Tree Regression
# Importing the dataset
dataset = read.csv('Position_Salaries.csv')
dataset = dataset[2:3]
# Splitting the dataset into the Training set and Test set
# # install.packages('caTools')
# library(caTools)
# set.seed(123)
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
# training_set = subset(dataset, split == TRUE)
# test_set = subset(dataset, split == FALSE)
# Feature Scaling
# training_set = scale(training_set)
# test_set = scale(test_set)
# Fitting Decision Tree Regression to the dataset
# install.packages('rpart')
library(rpart)
regressor = rpart(formula = Salary ~ .,
data = dataset,
control = rpart.control(minsplit = 1))
# Predicting a new result
y_pred = predict(regressor, data.frame(Level = 6.5))
# Decision Tree Regression
# Importing the dataset
dataset = read.csv('Position_Salaries.csv')
dataset = dataset[2:3]
# Splitting the dataset into the Training set and Test set
# # install.packages('caTools')
# library(caTools)
# set.seed(123)
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
# training_set = subset(dataset, split == TRUE)
# test_set = subset(dataset, split == FALSE)
# Feature Scaling
# training_set = scale(training_set)
# test_set = scale(test_set)
# Fitting Decision Tree Regression to the dataset
# install.packages('rpart')
library(rpart)
regressor = rpart(formula = Salary ~ .,
data = dataset,
control = rpart.control(minsplit = 1))
# Predicting a new result
y_pred = predict(regressor, data.frame(Level = 6.5))
# Decision Tree Regression
# Importing the dataset
dataset = read.csv('Position_Salaries.csv')
dataset = dataset[2:3]
# Splitting the dataset into the Training set and Test set
# # install.packages('caTools')
# library(caTools)
# set.seed(123)
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
# training_set = subset(dataset, split == TRUE)
# test_set = subset(dataset, split == FALSE)
# Feature Scaling
# training_set = scale(training_set)
# test_set = scale(test_set)
# Fitting Decision Tree Regression to the dataset
# install.packages('rpart')
library(rpart)
regressor = rpart(formula = Salary ~ .,
data = dataset,
control = rpart.control(minsplit = 1))
# Predicting a new result
y_pred = predict(regressor, data.frame(Level = 6.5))
# Decision Tree Regression
# Importing the dataset
dataset = read.csv('Position_Salaries.csv')
dataset = dataset[2:3]
# Splitting the dataset into the Training set and Test set
# # install.packages('caTools')
# library(caTools)
# set.seed(123)
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
# training_set = subset(dataset, split == TRUE)
# test_set = subset(dataset, split == FALSE)
# Feature Scaling
# training_set = scale(training_set)
# test_set = scale(test_set)
# Fitting Decision Tree Regression to the dataset
# install.packages('rpart')
library(rpart)
regressor = rpart(formula = Salary ~ .,
data = dataset)
# Predicting a new result
y_pred = predict(regressor, data.frame(Level = 6.5))
# Visualising the Decision Tree Regression results
# install.packages('ggplot2')
library(ggplot2)
ggplot() +
geom_point(aes(x = dataset$Level, y = dataset$Salary),
colour = 'red') +
geom_line(aes(x = dataset$Level, y = predict(regressor, newdata = dataset)),
colour = 'blue') +
ggtitle('Truth or Bluff (Decision Tree Regression)') +
xlab('Level') +
ylab('Salary')
# Visualising the Decision Tree Regression results
# install.packages('ggplot2')
library(ggplot2)
ggplot() +
geom_point(aes(x = dataset$Level, y = dataset$Salary),
colour = 'red') +
geom_line(aes(x = dataset$Level, y = predict(regressor, newdata = dataset)),
colour = 'blue') +
ggtitle('Truth or Bluff (Decision Tree Regression)') +
xlab('Level') +
ylab('Salary')
# Fitting Decision Tree Regression to the dataset
# install.packages('rpart')
library(rpart)
regressor = rpart(formula = Salary ~ .,
data = dataset,
control = rpart.control(minsplit = 1))
# Visualising the Decision Tree Regression results
# install.packages('ggplot2')
library(ggplot2)
ggplot() +
geom_point(aes(x = dataset$Level, y = dataset$Salary),
colour = 'red') +
geom_line(aes(x = dataset$Level, y = predict(regressor, newdata = dataset)),
colour = 'blue') +
ggtitle('Truth or Bluff (Decision Tree Regression)') +
xlab('Level') +
ylab('Salary')
# Visualising the Decision Tree Regression results (for higher resolution and smoother curve)
# install.packages('ggplot2')
library(ggplot2)
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.1)
ggplot() +
geom_point(aes(x = dataset$Level, y = dataset$Salary),
colour = 'red') +
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
colour = 'blue') +
ggtitle('Truth or Bluff (Decision Tree Regression)') +
xlab('Level') +
ylab('Salary')
# Visualising the Decision Tree Regression results (for higher resolution and smoother curve)
# install.packages('ggplot2')
library(ggplot2)
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
ggplot() +
geom_point(aes(x = dataset$Level, y = dataset$Salary),
colour = 'red') +
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
colour = 'blue') +
ggtitle('Truth or Bluff (Decision Tree Regression)') +
xlab('Level') +
ylab('Salary')
y_pred = predict(regressor, data.frame(Level = 6.5))
setwd("~/Desktop/Machine Learning A-Z/Part 2 - Regression/Section 9 - Random Forest Regression")
# Importing the dataset
dataset = read.csv('Position_Salaries.csv')
dataset = dataset[2:3]
View(dataset)
library(randomForest)
set.seed(1234)
regressor = randomForest(x = dataset[1],
y = dataset$Salary,
ntree = 10)
# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
# install.packages('ggplot2')
library(ggplot2)
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.1)
ggplot() +
geom_point(aes(x = dataset$Level, y = dataset$Salary),
colour = 'red') +
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
colour = 'blue') +
ggtitle('Truth or Bluff (Random Forest Regression)') +
xlab('Level') +
ylab('Salary')
# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
# install.packages('ggplot2')
library(ggplot2)
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
ggplot() +
geom_point(aes(x = dataset$Level, y = dataset$Salary),
colour = 'red') +
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
colour = 'blue') +
ggtitle('Truth or Bluff (Random Forest Regression)') +
xlab('Level') +
ylab('Salary')
set.seed(1234)
regressor = randomForest(x = dataset[1],
y = dataset$Salary,
ntree = 100)
# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
# install.packages('ggplot2')
library(ggplot2)
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
ggplot() +
geom_point(aes(x = dataset$Level, y = dataset$Salary),
colour = 'red') +
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
colour = 'blue') +
ggtitle('Truth or Bluff (Random Forest Regression)') +
xlab('Level') +
ylab('Salary')
y_pred = predict(regressor, data.frame(Level = 6.5))
set.seed(1234)
regressor = randomForest(x = dataset[1],
y = dataset$Salary,
ntree = 10)
# Random Forest Regression
# Importing the dataset
dataset = read.csv('Position_Salaries.csv')
dataset = dataset[2:3]
# Splitting the dataset into the Training set and Test set
# # install.packages('caTools')
# library(caTools)
# set.seed(123)
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
# training_set = subset(dataset, split == TRUE)
# test_set = subset(dataset, split == FALSE)
# Feature Scaling
# training_set = scale(training_set)
# test_set = scale(test_set)
# Fitting Random Forest Regression to the dataset
# install.packages('randomForest')
library(randomForest)
set.seed(1234)
regressor = randomForest(x = dataset[1],
y = dataset$Salary,
ntree = 10)
# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
# install.packages('ggplot2')
library(ggplot2)
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
ggplot() +
geom_point(aes(x = dataset$Level, y = dataset$Salary),
colour = 'red') +
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
colour = 'blue') +
ggtitle('Truth or Bluff (Random Forest Regression)') +
xlab('Level') +
ylab('Salary')
y_pred = predict(regressor, data.frame(Level = 6.5))
regressor = randomForest(x = dataset[1],
y = dataset$Salary,
ntree = 100)
# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
# install.packages('ggplot2')
library(ggplot2)
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
ggplot() +
geom_point(aes(x = dataset$Level, y = dataset$Salary),
colour = 'red') +
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
colour = 'blue') +
ggtitle('Truth or Bluff (Random Forest Regression)') +
xlab('Level') +
ylab('Salary')
y_pred = predict(regressor, data.frame(Level = 6.5))
set.seed(1234)
regressor = randomForest(x = dataset[1],
y = dataset$Salary,
ntree = 500)
# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
# install.packages('ggplot2')
library(ggplot2)
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
ggplot() +
geom_point(aes(x = dataset$Level, y = dataset$Salary),
colour = 'red') +
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
colour = 'blue') +
ggtitle('Truth or Bluff (Random Forest Regression)') +
xlab('Level') +
ylab('Salary')
y_pred = predict(regressor, data.frame(Level = 6.5))
# Importing the dataset
dataset = read.csv('Position_Salaries.csv')
dataset = dataset[2:3]
library(randomForest)
set.seed(1234)
regressor = randomForest(x = dataset[1],
y = dataset$Salary,
ntree = 500)
# Fitting Random Forest Regression to the dataset
# install.packages('randomForest')
library(randomForest)
set.seed(1234)
regressor = randomForest(x = dataset[1],
y = dataset$Salary,
ntree = 10)
# Random Forest Regression
# Importing the dataset
dataset = read.csv('Position_Salaries.csv')
dataset = dataset[2:3]
# Splitting the dataset into the Training set and Test set
# # install.packages('caTools')
# library(caTools)
# set.seed(123)
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
# training_set = subset(dataset, split == TRUE)
# test_set = subset(dataset, split == FALSE)
# Feature Scaling
# training_set = scale(training_set)
# test_set = scale(test_set)
# Fitting Random Forest Regression to the dataset
# install.packages('randomForest')
library(randomForest)
set.seed(1234)
regressor = randomForest(x = dataset[1],
y = dataset$Salary,
ntree = 10)
# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
# install.packages('ggplot2')
library(ggplot2)
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
ggplot() +
geom_point(aes(x = dataset$Level, y = dataset$Salary),
colour = 'red') +
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
colour = 'blue') +
ggtitle('Truth or Bluff (Random Forest Regression)') +
xlab('Level') +
ylab('Salary')
setwd("~/Desktop/Machine Learning A-Z/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------")
dataset = read.csv('Data.csv')
View(dataset)
dataset$Age = ifelse(is.na(dataset$Age),
ave(dataset$Age, dataset$Country, FUN = function(x) mean(x, na.rm = TRUE)),
dataset$Age)
dataset$Salary = ifelse(is.na(dataset$Salary),
ave(dataset$Salary, dataset$Country, FUN = function(x) mean(x, na.rm = TRUE)),
dataset$Salary)
dataset = read.csv('Data.csv')
setwd("~/Downloads/Drug Final/Drug Final")
drugdata <- read.csv("Drug_data.csv", header = T)
View(drugdata)
drug_model = rpart(formula = DFREE ~ ., data = training_set)
# Splitting the dataset into the Training set and Test set
# install.packages('caTools')
library(caTools)
set.seed(123)
split <- sample.split(drugdata$DFREE, SplitRatio = 0.75)
training_set <- subset(drugdata,split == TRUE)
test_set <- subset(drugdata,split == FALSE)
library(rpart)
library(partykit)
drug_model = rpart(formula = DFREE ~ ., data = training_set)
plot(drug_model)
text(drug_model, pretty = 0)
library(ROCR)
ROCRpred <- prediction(test_set$prob[,1], test_set$DFREE)
ROCRperf <- performance(ROCRpred, 'tpr','fpr')
plot(ROCRperf, colorize = TRUE, text.adj = c(-0.2,1.7))
abline(0,1, lty = 8, col = 'grey')
AUCperf <- performance(ROCRpred, 'auc')
attr(AUCperf, 'y.values')
# Deciding the optimal cut off point.
test_set$pred_et<- ifelse(test_set$prob > 0.27 , 1 ,0)
test_set$pred_et <- as.factor(test_set$pred_et[,1])
#Confusion Marix
cm <- table(test_set$DFREE, test_set$pred_et)
cm
Efficiency <- sum(diag(cm))/sum(cm)
Efficiency
drugdata <- read.csv("Drug_data.csv", header = T)
View(drugdata)
# PCA
# Importing the dataset
dataset = read.csv('Wine.csv')
# Splitting the dataset into the Training set and Test set
# install.packages('caTools')
library(caTools)
set.seed(123)
split = sample.split(dataset$Customer_Segment, SplitRatio = 0.8)
training_set = subset(dataset, split == TRUE)
test_set = subset(dataset, split == FALSE)
# Feature Scaling
training_set[-14] = scale(training_set[-14])
test_set[-14] = scale(test_set[-14])
# Applying PCA
# install.packages('caret')
library(caret)
# install.packages('e1071')
library(e1071)
pca = preProcess(x = training_set[-14], method = 'pca', pcaComp = 2)
training_set = predict(pca, training_set)
training_set = training_set[c(2, 3, 1)]
test_set = predict(pca, test_set)
test_set = test_set[c(2, 3, 1)]
a = 2
a = 2
dat <- readLines('ratings.dat')
dat <- gsub("::", " ", dat)
dat <- textConnection(dat)
dat <- read.table(dat)
setwd("~/Documents/Udemy/Teaching/Deep Learning A-Z/Datasets/ml-1m")
dat <- readLines('ratings.dat')
dat <- gsub("::", " ", dat)
dat <- textConnection(dat)
dat <- read.table(dat)
View(dat)
library(caTools)
split = sample.split(dataset$V1, SplitRatio = 0.75)
split = sample.split(dat$V1, SplitRatio = 0.75)
training_set = subset(dat, split == TRUE)
test_set = subset(dat, split == FALSE)
View(training_set)
View(test_set)
library("foreign", lib.loc="/Library/Frameworks/R.framework/Versions/3.3/Resources/library")
write.table(training_set, "training_set.dat", sep = "\t")
write.table(test_set, "test_set.dat", sep = "\t")
View(training_set)
write.table(training_set-2, "training_set.txt", sep = "\t")
dataset = read.csv('ratings_bis.csv', header = FALSE)
dataset = read.delim('ratings_bis.csv', header = FALSE, sep = '\t')
View(dataset)
dataset = read.csv('ratings_bis.csv', header = FALSE)
View(dataset)
library(caTools)
split = sample.split(dataset$V1, SplitRatio = 0.75)
training_set = subset(dataset, split == TRUE)
test_set = subset(dataset, split == FALSE)
write.table(training_set, "training_set.csv")
write.table(test_set, "test_set.csv")
dataset = read.csv('ratings_bis.csv', header = FALSE)
View(dataset)
colnames(dataset) <- c("User","Movie","Rating","Timestamp")
library(caTools)
split = sample.split(dataset$User, SplitRatio = 0.75)
training_set = subset(dataset, split == TRUE)
test_set = subset(dataset, split == FALSE)
write.csv(training_set, "training_set.csv")
write.csv(test_set, "test_set.csv")
write.csv(training_set, "training_set.csv")
write.csv(test_set, "test_set.csv")
View(training_set)
training_set = training_set
View(training_set)
training_set = read.csv('training_set.csv')
test_set = read.csv('test_set.csv')
training_set = training_set[1:5]
View(training_set)
training_set = read.csv('training_set.csv')
test_set = read.csv('test_set.csv')
training_set = training_set[2:5]
test_set = test_set[2:5]
View(training_set)
View(test_set)
training_set = read.csv('training_set.csv')
training_set = read.csv('training_set.csv')
test_set = read.csv('test_set.csv')
training_set = training_set[2:5]
test_set = test_set[2:5]
View(training_set)
View(test_set)
write.csv(training_set, "training_set.csv")
write.csv(test_set, "test_set.csv")
dataset = read.csv('ratings.csv', header = FALSE)
colnames(dataset) = c("User","Movie","Rating","Timestamp")
dataset = read.csv('ratings.csv', header = FALSE)
colnames(dataset) = c("User","Movie","Rating","Timestamp")
View(dataset)
library(caTools)
split = sample.split(dataset$User, SplitRatio = 0.75)
training_set = subset(dataset, split == TRUE)
test_set = subset(dataset, split == FALSE)
write.csv(training_set, "training_set.csv")
write.csv(test_set, "test_set.csv")