513 lines
18 KiB
R
513 lines
18 KiB
R
# Predicting a new result
|
|
y_pred = predict(regressor, data.frame(Level = 6.5))
|
|
# Decision Tree Regression
|
|
# Importing the dataset
|
|
dataset = read.csv('Position_Salaries.csv')
|
|
dataset = dataset[2:3]
|
|
# Splitting the dataset into the Training set and Test set
|
|
# # install.packages('caTools')
|
|
# library(caTools)
|
|
# set.seed(123)
|
|
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
|
|
# training_set = subset(dataset, split == TRUE)
|
|
# test_set = subset(dataset, split == FALSE)
|
|
# Feature Scaling
|
|
# training_set = scale(training_set)
|
|
# test_set = scale(test_set)
|
|
# Fitting Decision Tree Regression to the dataset
|
|
# install.packages('rpart')
|
|
library(rpart)
|
|
regressor = rpart(formula = Salary ~ .,
|
|
data = dataset,
|
|
control = rpart.control(minsplit = 1))
|
|
# Predicting a new result
|
|
y_pred = predict(regressor, data.frame(Level = 6.5))
|
|
# Decision Tree Regression
|
|
# Importing the dataset
|
|
dataset = read.csv('Position_Salaries.csv')
|
|
dataset = dataset[2:3]
|
|
# Splitting the dataset into the Training set and Test set
|
|
# # install.packages('caTools')
|
|
# library(caTools)
|
|
# set.seed(123)
|
|
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
|
|
# training_set = subset(dataset, split == TRUE)
|
|
# test_set = subset(dataset, split == FALSE)
|
|
# Feature Scaling
|
|
# training_set = scale(training_set)
|
|
# test_set = scale(test_set)
|
|
# Fitting Decision Tree Regression to the dataset
|
|
# install.packages('rpart')
|
|
library(rpart)
|
|
regressor = rpart(formula = Salary ~ .,
|
|
data = dataset,
|
|
control = rpart.control(minsplit = 1))
|
|
# Predicting a new result
|
|
y_pred = predict(regressor, data.frame(Level = 6.5))
|
|
# Decision Tree Regression
|
|
# Importing the dataset
|
|
dataset = read.csv('Position_Salaries.csv')
|
|
dataset = dataset[2:3]
|
|
# Splitting the dataset into the Training set and Test set
|
|
# # install.packages('caTools')
|
|
# library(caTools)
|
|
# set.seed(123)
|
|
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
|
|
# training_set = subset(dataset, split == TRUE)
|
|
# test_set = subset(dataset, split == FALSE)
|
|
# Feature Scaling
|
|
# training_set = scale(training_set)
|
|
# test_set = scale(test_set)
|
|
# Fitting Decision Tree Regression to the dataset
|
|
# install.packages('rpart')
|
|
library(rpart)
|
|
regressor = rpart(formula = Salary ~ .,
|
|
data = dataset,
|
|
control = rpart.control(minsplit = 1))
|
|
# Predicting a new result
|
|
y_pred = predict(regressor, data.frame(Level = 6.5))
|
|
# Decision Tree Regression
|
|
# Importing the dataset
|
|
dataset = read.csv('Position_Salaries.csv')
|
|
dataset = dataset[2:3]
|
|
# Splitting the dataset into the Training set and Test set
|
|
# # install.packages('caTools')
|
|
# library(caTools)
|
|
# set.seed(123)
|
|
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
|
|
# training_set = subset(dataset, split == TRUE)
|
|
# test_set = subset(dataset, split == FALSE)
|
|
# Feature Scaling
|
|
# training_set = scale(training_set)
|
|
# test_set = scale(test_set)
|
|
# Fitting Decision Tree Regression to the dataset
|
|
# install.packages('rpart')
|
|
library(rpart)
|
|
regressor = rpart(formula = Salary ~ .,
|
|
data = dataset,
|
|
control = rpart.control(minsplit = 1))
|
|
# Predicting a new result
|
|
y_pred = predict(regressor, data.frame(Level = 6.5))
|
|
# Decision Tree Regression
|
|
# Importing the dataset
|
|
dataset = read.csv('Position_Salaries.csv')
|
|
dataset = dataset[2:3]
|
|
# Splitting the dataset into the Training set and Test set
|
|
# # install.packages('caTools')
|
|
# library(caTools)
|
|
# set.seed(123)
|
|
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
|
|
# training_set = subset(dataset, split == TRUE)
|
|
# test_set = subset(dataset, split == FALSE)
|
|
# Feature Scaling
|
|
# training_set = scale(training_set)
|
|
# test_set = scale(test_set)
|
|
# Fitting Decision Tree Regression to the dataset
|
|
# install.packages('rpart')
|
|
library(rpart)
|
|
regressor = rpart(formula = Salary ~ .,
|
|
data = dataset,
|
|
control = rpart.control(minsplit = 1))
|
|
# Predicting a new result
|
|
y_pred = predict(regressor, data.frame(Level = 6.5))
|
|
# Decision Tree Regression
|
|
# Importing the dataset
|
|
dataset = read.csv('Position_Salaries.csv')
|
|
dataset = dataset[2:3]
|
|
# Splitting the dataset into the Training set and Test set
|
|
# # install.packages('caTools')
|
|
# library(caTools)
|
|
# set.seed(123)
|
|
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
|
|
# training_set = subset(dataset, split == TRUE)
|
|
# test_set = subset(dataset, split == FALSE)
|
|
# Feature Scaling
|
|
# training_set = scale(training_set)
|
|
# test_set = scale(test_set)
|
|
# Fitting Decision Tree Regression to the dataset
|
|
# install.packages('rpart')
|
|
library(rpart)
|
|
regressor = rpart(formula = Salary ~ .,
|
|
data = dataset)
|
|
# Predicting a new result
|
|
y_pred = predict(regressor, data.frame(Level = 6.5))
|
|
# Visualising the Decision Tree Regression results
|
|
# install.packages('ggplot2')
|
|
library(ggplot2)
|
|
ggplot() +
|
|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
|
|
colour = 'red') +
|
|
geom_line(aes(x = dataset$Level, y = predict(regressor, newdata = dataset)),
|
|
colour = 'blue') +
|
|
ggtitle('Truth or Bluff (Decision Tree Regression)') +
|
|
xlab('Level') +
|
|
ylab('Salary')
|
|
# Visualising the Decision Tree Regression results
|
|
# install.packages('ggplot2')
|
|
library(ggplot2)
|
|
ggplot() +
|
|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
|
|
colour = 'red') +
|
|
geom_line(aes(x = dataset$Level, y = predict(regressor, newdata = dataset)),
|
|
colour = 'blue') +
|
|
ggtitle('Truth or Bluff (Decision Tree Regression)') +
|
|
xlab('Level') +
|
|
ylab('Salary')
|
|
# Fitting Decision Tree Regression to the dataset
|
|
# install.packages('rpart')
|
|
library(rpart)
|
|
regressor = rpart(formula = Salary ~ .,
|
|
data = dataset,
|
|
control = rpart.control(minsplit = 1))
|
|
# Visualising the Decision Tree Regression results
|
|
# install.packages('ggplot2')
|
|
library(ggplot2)
|
|
ggplot() +
|
|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
|
|
colour = 'red') +
|
|
geom_line(aes(x = dataset$Level, y = predict(regressor, newdata = dataset)),
|
|
colour = 'blue') +
|
|
ggtitle('Truth or Bluff (Decision Tree Regression)') +
|
|
xlab('Level') +
|
|
ylab('Salary')
|
|
# Visualising the Decision Tree Regression results (for higher resolution and smoother curve)
|
|
# install.packages('ggplot2')
|
|
library(ggplot2)
|
|
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.1)
|
|
ggplot() +
|
|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
|
|
colour = 'red') +
|
|
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
|
|
colour = 'blue') +
|
|
ggtitle('Truth or Bluff (Decision Tree Regression)') +
|
|
xlab('Level') +
|
|
ylab('Salary')
|
|
# Visualising the Decision Tree Regression results (for higher resolution and smoother curve)
|
|
# install.packages('ggplot2')
|
|
library(ggplot2)
|
|
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
|
|
ggplot() +
|
|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
|
|
colour = 'red') +
|
|
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
|
|
colour = 'blue') +
|
|
ggtitle('Truth or Bluff (Decision Tree Regression)') +
|
|
xlab('Level') +
|
|
ylab('Salary')
|
|
y_pred = predict(regressor, data.frame(Level = 6.5))
|
|
setwd("~/Desktop/Machine Learning A-Z/Part 2 - Regression/Section 9 - Random Forest Regression")
|
|
# Importing the dataset
|
|
dataset = read.csv('Position_Salaries.csv')
|
|
dataset = dataset[2:3]
|
|
View(dataset)
|
|
library(randomForest)
|
|
set.seed(1234)
|
|
regressor = randomForest(x = dataset[1],
|
|
y = dataset$Salary,
|
|
ntree = 10)
|
|
# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
|
|
# install.packages('ggplot2')
|
|
library(ggplot2)
|
|
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.1)
|
|
ggplot() +
|
|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
|
|
colour = 'red') +
|
|
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
|
|
colour = 'blue') +
|
|
ggtitle('Truth or Bluff (Random Forest Regression)') +
|
|
xlab('Level') +
|
|
ylab('Salary')
|
|
# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
|
|
# install.packages('ggplot2')
|
|
library(ggplot2)
|
|
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
|
|
ggplot() +
|
|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
|
|
colour = 'red') +
|
|
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
|
|
colour = 'blue') +
|
|
ggtitle('Truth or Bluff (Random Forest Regression)') +
|
|
xlab('Level') +
|
|
ylab('Salary')
|
|
set.seed(1234)
|
|
regressor = randomForest(x = dataset[1],
|
|
y = dataset$Salary,
|
|
ntree = 100)
|
|
# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
|
|
# install.packages('ggplot2')
|
|
library(ggplot2)
|
|
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
|
|
ggplot() +
|
|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
|
|
colour = 'red') +
|
|
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
|
|
colour = 'blue') +
|
|
ggtitle('Truth or Bluff (Random Forest Regression)') +
|
|
xlab('Level') +
|
|
ylab('Salary')
|
|
y_pred = predict(regressor, data.frame(Level = 6.5))
|
|
set.seed(1234)
|
|
regressor = randomForest(x = dataset[1],
|
|
y = dataset$Salary,
|
|
ntree = 10)
|
|
# Random Forest Regression
|
|
# Importing the dataset
|
|
dataset = read.csv('Position_Salaries.csv')
|
|
dataset = dataset[2:3]
|
|
# Splitting the dataset into the Training set and Test set
|
|
# # install.packages('caTools')
|
|
# library(caTools)
|
|
# set.seed(123)
|
|
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
|
|
# training_set = subset(dataset, split == TRUE)
|
|
# test_set = subset(dataset, split == FALSE)
|
|
# Feature Scaling
|
|
# training_set = scale(training_set)
|
|
# test_set = scale(test_set)
|
|
# Fitting Random Forest Regression to the dataset
|
|
# install.packages('randomForest')
|
|
library(randomForest)
|
|
set.seed(1234)
|
|
regressor = randomForest(x = dataset[1],
|
|
y = dataset$Salary,
|
|
ntree = 10)
|
|
# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
|
|
# install.packages('ggplot2')
|
|
library(ggplot2)
|
|
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
|
|
ggplot() +
|
|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
|
|
colour = 'red') +
|
|
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
|
|
colour = 'blue') +
|
|
ggtitle('Truth or Bluff (Random Forest Regression)') +
|
|
xlab('Level') +
|
|
ylab('Salary')
|
|
y_pred = predict(regressor, data.frame(Level = 6.5))
|
|
regressor = randomForest(x = dataset[1],
|
|
y = dataset$Salary,
|
|
ntree = 100)
|
|
# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
|
|
# install.packages('ggplot2')
|
|
library(ggplot2)
|
|
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
|
|
ggplot() +
|
|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
|
|
colour = 'red') +
|
|
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
|
|
colour = 'blue') +
|
|
ggtitle('Truth or Bluff (Random Forest Regression)') +
|
|
xlab('Level') +
|
|
ylab('Salary')
|
|
y_pred = predict(regressor, data.frame(Level = 6.5))
|
|
set.seed(1234)
|
|
regressor = randomForest(x = dataset[1],
|
|
y = dataset$Salary,
|
|
ntree = 500)
|
|
# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
|
|
# install.packages('ggplot2')
|
|
library(ggplot2)
|
|
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
|
|
ggplot() +
|
|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
|
|
colour = 'red') +
|
|
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
|
|
colour = 'blue') +
|
|
ggtitle('Truth or Bluff (Random Forest Regression)') +
|
|
xlab('Level') +
|
|
ylab('Salary')
|
|
y_pred = predict(regressor, data.frame(Level = 6.5))
|
|
# Importing the dataset
|
|
dataset = read.csv('Position_Salaries.csv')
|
|
dataset = dataset[2:3]
|
|
library(randomForest)
|
|
set.seed(1234)
|
|
regressor = randomForest(x = dataset[1],
|
|
y = dataset$Salary,
|
|
ntree = 500)
|
|
# Fitting Random Forest Regression to the dataset
|
|
# install.packages('randomForest')
|
|
library(randomForest)
|
|
set.seed(1234)
|
|
regressor = randomForest(x = dataset[1],
|
|
y = dataset$Salary,
|
|
ntree = 10)
|
|
# Random Forest Regression
|
|
# Importing the dataset
|
|
dataset = read.csv('Position_Salaries.csv')
|
|
dataset = dataset[2:3]
|
|
# Splitting the dataset into the Training set and Test set
|
|
# # install.packages('caTools')
|
|
# library(caTools)
|
|
# set.seed(123)
|
|
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
|
|
# training_set = subset(dataset, split == TRUE)
|
|
# test_set = subset(dataset, split == FALSE)
|
|
# Feature Scaling
|
|
# training_set = scale(training_set)
|
|
# test_set = scale(test_set)
|
|
# Fitting Random Forest Regression to the dataset
|
|
# install.packages('randomForest')
|
|
library(randomForest)
|
|
set.seed(1234)
|
|
regressor = randomForest(x = dataset[1],
|
|
y = dataset$Salary,
|
|
ntree = 10)
|
|
# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
|
|
# install.packages('ggplot2')
|
|
library(ggplot2)
|
|
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
|
|
ggplot() +
|
|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
|
|
colour = 'red') +
|
|
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
|
|
colour = 'blue') +
|
|
ggtitle('Truth or Bluff (Random Forest Regression)') +
|
|
xlab('Level') +
|
|
ylab('Salary')
|
|
setwd("~/Desktop/Machine Learning A-Z/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------")
|
|
dataset = read.csv('Data.csv')
|
|
View(dataset)
|
|
dataset$Age = ifelse(is.na(dataset$Age),
|
|
ave(dataset$Age, dataset$Country, FUN = function(x) mean(x, na.rm = TRUE)),
|
|
dataset$Age)
|
|
dataset$Salary = ifelse(is.na(dataset$Salary),
|
|
ave(dataset$Salary, dataset$Country, FUN = function(x) mean(x, na.rm = TRUE)),
|
|
dataset$Salary)
|
|
dataset = read.csv('Data.csv')
|
|
setwd("~/Downloads/Drug Final/Drug Final")
|
|
drugdata <- read.csv("Drug_data.csv", header = T)
|
|
View(drugdata)
|
|
drug_model = rpart(formula = DFREE ~ ., data = training_set)
|
|
# Splitting the dataset into the Training set and Test set
|
|
# install.packages('caTools')
|
|
library(caTools)
|
|
set.seed(123)
|
|
split <- sample.split(drugdata$DFREE, SplitRatio = 0.75)
|
|
training_set <- subset(drugdata,split == TRUE)
|
|
test_set <- subset(drugdata,split == FALSE)
|
|
library(rpart)
|
|
library(partykit)
|
|
drug_model = rpart(formula = DFREE ~ ., data = training_set)
|
|
plot(drug_model)
|
|
text(drug_model, pretty = 0)
|
|
library(ROCR)
|
|
ROCRpred <- prediction(test_set$prob[,1], test_set$DFREE)
|
|
ROCRperf <- performance(ROCRpred, 'tpr','fpr')
|
|
plot(ROCRperf, colorize = TRUE, text.adj = c(-0.2,1.7))
|
|
abline(0,1, lty = 8, col = 'grey')
|
|
AUCperf <- performance(ROCRpred, 'auc')
|
|
attr(AUCperf, 'y.values')
|
|
# Deciding the optimal cut off point.
|
|
test_set$pred_et<- ifelse(test_set$prob > 0.27 , 1 ,0)
|
|
test_set$pred_et <- as.factor(test_set$pred_et[,1])
|
|
#Confusion Marix
|
|
cm <- table(test_set$DFREE, test_set$pred_et)
|
|
cm
|
|
Efficiency <- sum(diag(cm))/sum(cm)
|
|
Efficiency
|
|
drugdata <- read.csv("Drug_data.csv", header = T)
|
|
View(drugdata)
|
|
# PCA
|
|
# Importing the dataset
|
|
dataset = read.csv('Wine.csv')
|
|
# Splitting the dataset into the Training set and Test set
|
|
# install.packages('caTools')
|
|
library(caTools)
|
|
set.seed(123)
|
|
split = sample.split(dataset$Customer_Segment, SplitRatio = 0.8)
|
|
training_set = subset(dataset, split == TRUE)
|
|
test_set = subset(dataset, split == FALSE)
|
|
# Feature Scaling
|
|
training_set[-14] = scale(training_set[-14])
|
|
test_set[-14] = scale(test_set[-14])
|
|
# Applying PCA
|
|
# install.packages('caret')
|
|
library(caret)
|
|
# install.packages('e1071')
|
|
library(e1071)
|
|
pca = preProcess(x = training_set[-14], method = 'pca', pcaComp = 2)
|
|
training_set = predict(pca, training_set)
|
|
training_set = training_set[c(2, 3, 1)]
|
|
test_set = predict(pca, test_set)
|
|
test_set = test_set[c(2, 3, 1)]
|
|
a = 2
|
|
a = 2
|
|
dat <- readLines('ratings.dat')
|
|
dat <- gsub("::", " ", dat)
|
|
dat <- textConnection(dat)
|
|
dat <- read.table(dat)
|
|
setwd("~/Documents/Udemy/Teaching/Deep Learning A-Z/Datasets/ml-1m")
|
|
dat <- readLines('ratings.dat')
|
|
dat <- gsub("::", " ", dat)
|
|
dat <- textConnection(dat)
|
|
dat <- read.table(dat)
|
|
View(dat)
|
|
library(caTools)
|
|
split = sample.split(dataset$V1, SplitRatio = 0.75)
|
|
split = sample.split(dat$V1, SplitRatio = 0.75)
|
|
training_set = subset(dat, split == TRUE)
|
|
test_set = subset(dat, split == FALSE)
|
|
View(training_set)
|
|
View(test_set)
|
|
library("foreign", lib.loc="/Library/Frameworks/R.framework/Versions/3.3/Resources/library")
|
|
write.table(training_set, "training_set.dat", sep = "\t")
|
|
write.table(test_set, "test_set.dat", sep = "\t")
|
|
View(training_set)
|
|
write.table(training_set-2, "training_set.txt", sep = "\t")
|
|
dataset = read.csv('ratings_bis.csv', header = FALSE)
|
|
dataset = read.delim('ratings_bis.csv', header = FALSE, sep = '\t')
|
|
View(dataset)
|
|
dataset = read.csv('ratings_bis.csv', header = FALSE)
|
|
View(dataset)
|
|
library(caTools)
|
|
split = sample.split(dataset$V1, SplitRatio = 0.75)
|
|
training_set = subset(dataset, split == TRUE)
|
|
test_set = subset(dataset, split == FALSE)
|
|
write.table(training_set, "training_set.csv")
|
|
write.table(test_set, "test_set.csv")
|
|
dataset = read.csv('ratings_bis.csv', header = FALSE)
|
|
View(dataset)
|
|
colnames(dataset) <- c("User","Movie","Rating","Timestamp")
|
|
library(caTools)
|
|
split = sample.split(dataset$User, SplitRatio = 0.75)
|
|
training_set = subset(dataset, split == TRUE)
|
|
test_set = subset(dataset, split == FALSE)
|
|
write.csv(training_set, "training_set.csv")
|
|
write.csv(test_set, "test_set.csv")
|
|
write.csv(training_set, "training_set.csv")
|
|
write.csv(test_set, "test_set.csv")
|
|
View(training_set)
|
|
training_set = training_set
|
|
View(training_set)
|
|
training_set = read.csv('training_set.csv')
|
|
test_set = read.csv('test_set.csv')
|
|
training_set = training_set[1:5]
|
|
View(training_set)
|
|
training_set = read.csv('training_set.csv')
|
|
test_set = read.csv('test_set.csv')
|
|
training_set = training_set[2:5]
|
|
test_set = test_set[2:5]
|
|
View(training_set)
|
|
View(test_set)
|
|
training_set = read.csv('training_set.csv')
|
|
training_set = read.csv('training_set.csv')
|
|
test_set = read.csv('test_set.csv')
|
|
training_set = training_set[2:5]
|
|
test_set = test_set[2:5]
|
|
View(training_set)
|
|
View(test_set)
|
|
write.csv(training_set, "training_set.csv")
|
|
write.csv(test_set, "test_set.csv")
|
|
dataset = read.csv('ratings.csv', header = FALSE)
|
|
colnames(dataset) = c("User","Movie","Rating","Timestamp")
|
|
dataset = read.csv('ratings.csv', header = FALSE)
|
|
colnames(dataset) = c("User","Movie","Rating","Timestamp")
|
|
View(dataset)
|
|
library(caTools)
|
|
split = sample.split(dataset$User, SplitRatio = 0.75)
|
|
training_set = subset(dataset, split == TRUE)
|
|
test_set = subset(dataset, split == FALSE)
|
|
write.csv(training_set, "training_set.csv")
|
|
write.csv(test_set, "test_set.csv")
|