513 lines
		
	
	
		
			18 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
			
		
		
	
	
			513 lines
		
	
	
		
			18 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
# Predicting a new result
 | 
						|
y_pred = predict(regressor, data.frame(Level = 6.5))
 | 
						|
# Decision Tree Regression
 | 
						|
# Importing the dataset
 | 
						|
dataset = read.csv('Position_Salaries.csv')
 | 
						|
dataset = dataset[2:3]
 | 
						|
# Splitting the dataset into the Training set and Test set
 | 
						|
# # install.packages('caTools')
 | 
						|
# library(caTools)
 | 
						|
# set.seed(123)
 | 
						|
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
 | 
						|
# training_set = subset(dataset, split == TRUE)
 | 
						|
# test_set = subset(dataset, split == FALSE)
 | 
						|
# Feature Scaling
 | 
						|
# training_set = scale(training_set)
 | 
						|
# test_set = scale(test_set)
 | 
						|
# Fitting Decision Tree Regression to the dataset
 | 
						|
# install.packages('rpart')
 | 
						|
library(rpart)
 | 
						|
regressor = rpart(formula = Salary ~ .,
 | 
						|
data = dataset,
 | 
						|
control = rpart.control(minsplit = 1))
 | 
						|
# Predicting a new result
 | 
						|
y_pred = predict(regressor, data.frame(Level = 6.5))
 | 
						|
# Decision Tree Regression
 | 
						|
# Importing the dataset
 | 
						|
dataset = read.csv('Position_Salaries.csv')
 | 
						|
dataset = dataset[2:3]
 | 
						|
# Splitting the dataset into the Training set and Test set
 | 
						|
# # install.packages('caTools')
 | 
						|
# library(caTools)
 | 
						|
# set.seed(123)
 | 
						|
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
 | 
						|
# training_set = subset(dataset, split == TRUE)
 | 
						|
# test_set = subset(dataset, split == FALSE)
 | 
						|
# Feature Scaling
 | 
						|
# training_set = scale(training_set)
 | 
						|
# test_set = scale(test_set)
 | 
						|
# Fitting Decision Tree Regression to the dataset
 | 
						|
# install.packages('rpart')
 | 
						|
library(rpart)
 | 
						|
regressor = rpart(formula = Salary ~ .,
 | 
						|
data = dataset,
 | 
						|
control = rpart.control(minsplit = 1))
 | 
						|
# Predicting a new result
 | 
						|
y_pred = predict(regressor, data.frame(Level = 6.5))
 | 
						|
# Decision Tree Regression
 | 
						|
# Importing the dataset
 | 
						|
dataset = read.csv('Position_Salaries.csv')
 | 
						|
dataset = dataset[2:3]
 | 
						|
# Splitting the dataset into the Training set and Test set
 | 
						|
# # install.packages('caTools')
 | 
						|
# library(caTools)
 | 
						|
# set.seed(123)
 | 
						|
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
 | 
						|
# training_set = subset(dataset, split == TRUE)
 | 
						|
# test_set = subset(dataset, split == FALSE)
 | 
						|
# Feature Scaling
 | 
						|
# training_set = scale(training_set)
 | 
						|
# test_set = scale(test_set)
 | 
						|
# Fitting Decision Tree Regression to the dataset
 | 
						|
# install.packages('rpart')
 | 
						|
library(rpart)
 | 
						|
regressor = rpart(formula = Salary ~ .,
 | 
						|
data = dataset,
 | 
						|
control = rpart.control(minsplit = 1))
 | 
						|
# Predicting a new result
 | 
						|
y_pred = predict(regressor, data.frame(Level = 6.5))
 | 
						|
# Decision Tree Regression
 | 
						|
# Importing the dataset
 | 
						|
dataset = read.csv('Position_Salaries.csv')
 | 
						|
dataset = dataset[2:3]
 | 
						|
# Splitting the dataset into the Training set and Test set
 | 
						|
# # install.packages('caTools')
 | 
						|
# library(caTools)
 | 
						|
# set.seed(123)
 | 
						|
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
 | 
						|
# training_set = subset(dataset, split == TRUE)
 | 
						|
# test_set = subset(dataset, split == FALSE)
 | 
						|
# Feature Scaling
 | 
						|
# training_set = scale(training_set)
 | 
						|
# test_set = scale(test_set)
 | 
						|
# Fitting Decision Tree Regression to the dataset
 | 
						|
# install.packages('rpart')
 | 
						|
library(rpart)
 | 
						|
regressor = rpart(formula = Salary ~ .,
 | 
						|
data = dataset,
 | 
						|
control = rpart.control(minsplit = 1))
 | 
						|
# Predicting a new result
 | 
						|
y_pred = predict(regressor, data.frame(Level = 6.5))
 | 
						|
# Decision Tree Regression
 | 
						|
# Importing the dataset
 | 
						|
dataset = read.csv('Position_Salaries.csv')
 | 
						|
dataset = dataset[2:3]
 | 
						|
# Splitting the dataset into the Training set and Test set
 | 
						|
# # install.packages('caTools')
 | 
						|
# library(caTools)
 | 
						|
# set.seed(123)
 | 
						|
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
 | 
						|
# training_set = subset(dataset, split == TRUE)
 | 
						|
# test_set = subset(dataset, split == FALSE)
 | 
						|
# Feature Scaling
 | 
						|
# training_set = scale(training_set)
 | 
						|
# test_set = scale(test_set)
 | 
						|
# Fitting Decision Tree Regression to the dataset
 | 
						|
# install.packages('rpart')
 | 
						|
library(rpart)
 | 
						|
regressor = rpart(formula = Salary ~ .,
 | 
						|
data = dataset,
 | 
						|
control = rpart.control(minsplit = 1))
 | 
						|
# Predicting a new result
 | 
						|
y_pred = predict(regressor, data.frame(Level = 6.5))
 | 
						|
# Decision Tree Regression
 | 
						|
# Importing the dataset
 | 
						|
dataset = read.csv('Position_Salaries.csv')
 | 
						|
dataset = dataset[2:3]
 | 
						|
# Splitting the dataset into the Training set and Test set
 | 
						|
# # install.packages('caTools')
 | 
						|
# library(caTools)
 | 
						|
# set.seed(123)
 | 
						|
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
 | 
						|
# training_set = subset(dataset, split == TRUE)
 | 
						|
# test_set = subset(dataset, split == FALSE)
 | 
						|
# Feature Scaling
 | 
						|
# training_set = scale(training_set)
 | 
						|
# test_set = scale(test_set)
 | 
						|
# Fitting Decision Tree Regression to the dataset
 | 
						|
# install.packages('rpart')
 | 
						|
library(rpart)
 | 
						|
regressor = rpart(formula = Salary ~ .,
 | 
						|
data = dataset)
 | 
						|
# Predicting a new result
 | 
						|
y_pred = predict(regressor, data.frame(Level = 6.5))
 | 
						|
# Visualising the Decision Tree Regression results
 | 
						|
# install.packages('ggplot2')
 | 
						|
library(ggplot2)
 | 
						|
ggplot() +
 | 
						|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
 | 
						|
colour = 'red') +
 | 
						|
geom_line(aes(x = dataset$Level, y = predict(regressor, newdata = dataset)),
 | 
						|
colour = 'blue') +
 | 
						|
ggtitle('Truth or Bluff (Decision Tree Regression)') +
 | 
						|
xlab('Level') +
 | 
						|
ylab('Salary')
 | 
						|
# Visualising the Decision Tree Regression results
 | 
						|
# install.packages('ggplot2')
 | 
						|
library(ggplot2)
 | 
						|
ggplot() +
 | 
						|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
 | 
						|
colour = 'red') +
 | 
						|
geom_line(aes(x = dataset$Level, y = predict(regressor, newdata = dataset)),
 | 
						|
colour = 'blue') +
 | 
						|
ggtitle('Truth or Bluff (Decision Tree Regression)') +
 | 
						|
xlab('Level') +
 | 
						|
ylab('Salary')
 | 
						|
# Fitting Decision Tree Regression to the dataset
 | 
						|
# install.packages('rpart')
 | 
						|
library(rpart)
 | 
						|
regressor = rpart(formula = Salary ~ .,
 | 
						|
data = dataset,
 | 
						|
control = rpart.control(minsplit = 1))
 | 
						|
# Visualising the Decision Tree Regression results
 | 
						|
# install.packages('ggplot2')
 | 
						|
library(ggplot2)
 | 
						|
ggplot() +
 | 
						|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
 | 
						|
colour = 'red') +
 | 
						|
geom_line(aes(x = dataset$Level, y = predict(regressor, newdata = dataset)),
 | 
						|
colour = 'blue') +
 | 
						|
ggtitle('Truth or Bluff (Decision Tree Regression)') +
 | 
						|
xlab('Level') +
 | 
						|
ylab('Salary')
 | 
						|
# Visualising the Decision Tree Regression results (for higher resolution and smoother curve)
 | 
						|
# install.packages('ggplot2')
 | 
						|
library(ggplot2)
 | 
						|
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.1)
 | 
						|
ggplot() +
 | 
						|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
 | 
						|
colour = 'red') +
 | 
						|
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
 | 
						|
colour = 'blue') +
 | 
						|
ggtitle('Truth or Bluff (Decision Tree Regression)') +
 | 
						|
xlab('Level') +
 | 
						|
ylab('Salary')
 | 
						|
# Visualising the Decision Tree Regression results (for higher resolution and smoother curve)
 | 
						|
# install.packages('ggplot2')
 | 
						|
library(ggplot2)
 | 
						|
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
 | 
						|
ggplot() +
 | 
						|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
 | 
						|
colour = 'red') +
 | 
						|
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
 | 
						|
colour = 'blue') +
 | 
						|
ggtitle('Truth or Bluff (Decision Tree Regression)') +
 | 
						|
xlab('Level') +
 | 
						|
ylab('Salary')
 | 
						|
y_pred = predict(regressor, data.frame(Level = 6.5))
 | 
						|
setwd("~/Desktop/Machine Learning A-Z/Part 2 - Regression/Section 9 - Random Forest Regression")
 | 
						|
# Importing the dataset
 | 
						|
dataset = read.csv('Position_Salaries.csv')
 | 
						|
dataset = dataset[2:3]
 | 
						|
View(dataset)
 | 
						|
library(randomForest)
 | 
						|
set.seed(1234)
 | 
						|
regressor = randomForest(x = dataset[1],
 | 
						|
y = dataset$Salary,
 | 
						|
ntree = 10)
 | 
						|
# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
 | 
						|
# install.packages('ggplot2')
 | 
						|
library(ggplot2)
 | 
						|
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.1)
 | 
						|
ggplot() +
 | 
						|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
 | 
						|
colour = 'red') +
 | 
						|
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
 | 
						|
colour = 'blue') +
 | 
						|
ggtitle('Truth or Bluff (Random Forest Regression)') +
 | 
						|
xlab('Level') +
 | 
						|
ylab('Salary')
 | 
						|
# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
 | 
						|
# install.packages('ggplot2')
 | 
						|
library(ggplot2)
 | 
						|
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
 | 
						|
ggplot() +
 | 
						|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
 | 
						|
colour = 'red') +
 | 
						|
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
 | 
						|
colour = 'blue') +
 | 
						|
ggtitle('Truth or Bluff (Random Forest Regression)') +
 | 
						|
xlab('Level') +
 | 
						|
ylab('Salary')
 | 
						|
set.seed(1234)
 | 
						|
regressor = randomForest(x = dataset[1],
 | 
						|
y = dataset$Salary,
 | 
						|
ntree = 100)
 | 
						|
# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
 | 
						|
# install.packages('ggplot2')
 | 
						|
library(ggplot2)
 | 
						|
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
 | 
						|
ggplot() +
 | 
						|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
 | 
						|
colour = 'red') +
 | 
						|
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
 | 
						|
colour = 'blue') +
 | 
						|
ggtitle('Truth or Bluff (Random Forest Regression)') +
 | 
						|
xlab('Level') +
 | 
						|
ylab('Salary')
 | 
						|
y_pred = predict(regressor, data.frame(Level = 6.5))
 | 
						|
set.seed(1234)
 | 
						|
regressor = randomForest(x = dataset[1],
 | 
						|
y = dataset$Salary,
 | 
						|
ntree = 10)
 | 
						|
# Random Forest Regression
 | 
						|
# Importing the dataset
 | 
						|
dataset = read.csv('Position_Salaries.csv')
 | 
						|
dataset = dataset[2:3]
 | 
						|
# Splitting the dataset into the Training set and Test set
 | 
						|
# # install.packages('caTools')
 | 
						|
# library(caTools)
 | 
						|
# set.seed(123)
 | 
						|
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
 | 
						|
# training_set = subset(dataset, split == TRUE)
 | 
						|
# test_set = subset(dataset, split == FALSE)
 | 
						|
# Feature Scaling
 | 
						|
# training_set = scale(training_set)
 | 
						|
# test_set = scale(test_set)
 | 
						|
# Fitting Random Forest Regression to the dataset
 | 
						|
# install.packages('randomForest')
 | 
						|
library(randomForest)
 | 
						|
set.seed(1234)
 | 
						|
regressor = randomForest(x = dataset[1],
 | 
						|
y = dataset$Salary,
 | 
						|
ntree = 10)
 | 
						|
# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
 | 
						|
# install.packages('ggplot2')
 | 
						|
library(ggplot2)
 | 
						|
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
 | 
						|
ggplot() +
 | 
						|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
 | 
						|
colour = 'red') +
 | 
						|
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
 | 
						|
colour = 'blue') +
 | 
						|
ggtitle('Truth or Bluff (Random Forest Regression)') +
 | 
						|
xlab('Level') +
 | 
						|
ylab('Salary')
 | 
						|
y_pred = predict(regressor, data.frame(Level = 6.5))
 | 
						|
regressor = randomForest(x = dataset[1],
 | 
						|
y = dataset$Salary,
 | 
						|
ntree = 100)
 | 
						|
# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
 | 
						|
# install.packages('ggplot2')
 | 
						|
library(ggplot2)
 | 
						|
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
 | 
						|
ggplot() +
 | 
						|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
 | 
						|
colour = 'red') +
 | 
						|
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
 | 
						|
colour = 'blue') +
 | 
						|
ggtitle('Truth or Bluff (Random Forest Regression)') +
 | 
						|
xlab('Level') +
 | 
						|
ylab('Salary')
 | 
						|
y_pred = predict(regressor, data.frame(Level = 6.5))
 | 
						|
set.seed(1234)
 | 
						|
regressor = randomForest(x = dataset[1],
 | 
						|
y = dataset$Salary,
 | 
						|
ntree = 500)
 | 
						|
# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
 | 
						|
# install.packages('ggplot2')
 | 
						|
library(ggplot2)
 | 
						|
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
 | 
						|
ggplot() +
 | 
						|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
 | 
						|
colour = 'red') +
 | 
						|
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
 | 
						|
colour = 'blue') +
 | 
						|
ggtitle('Truth or Bluff (Random Forest Regression)') +
 | 
						|
xlab('Level') +
 | 
						|
ylab('Salary')
 | 
						|
y_pred = predict(regressor, data.frame(Level = 6.5))
 | 
						|
# Importing the dataset
 | 
						|
dataset = read.csv('Position_Salaries.csv')
 | 
						|
dataset = dataset[2:3]
 | 
						|
library(randomForest)
 | 
						|
set.seed(1234)
 | 
						|
regressor = randomForest(x = dataset[1],
 | 
						|
y = dataset$Salary,
 | 
						|
ntree = 500)
 | 
						|
# Fitting Random Forest Regression to the dataset
 | 
						|
# install.packages('randomForest')
 | 
						|
library(randomForest)
 | 
						|
set.seed(1234)
 | 
						|
regressor = randomForest(x = dataset[1],
 | 
						|
y = dataset$Salary,
 | 
						|
ntree = 10)
 | 
						|
# Random Forest Regression
 | 
						|
# Importing the dataset
 | 
						|
dataset = read.csv('Position_Salaries.csv')
 | 
						|
dataset = dataset[2:3]
 | 
						|
# Splitting the dataset into the Training set and Test set
 | 
						|
# # install.packages('caTools')
 | 
						|
# library(caTools)
 | 
						|
# set.seed(123)
 | 
						|
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
 | 
						|
# training_set = subset(dataset, split == TRUE)
 | 
						|
# test_set = subset(dataset, split == FALSE)
 | 
						|
# Feature Scaling
 | 
						|
# training_set = scale(training_set)
 | 
						|
# test_set = scale(test_set)
 | 
						|
# Fitting Random Forest Regression to the dataset
 | 
						|
# install.packages('randomForest')
 | 
						|
library(randomForest)
 | 
						|
set.seed(1234)
 | 
						|
regressor = randomForest(x = dataset[1],
 | 
						|
y = dataset$Salary,
 | 
						|
ntree = 10)
 | 
						|
# Visualising the Random Forest Regression results (for higher resolution and smoother curve)
 | 
						|
# install.packages('ggplot2')
 | 
						|
library(ggplot2)
 | 
						|
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
 | 
						|
ggplot() +
 | 
						|
geom_point(aes(x = dataset$Level, y = dataset$Salary),
 | 
						|
colour = 'red') +
 | 
						|
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
 | 
						|
colour = 'blue') +
 | 
						|
ggtitle('Truth or Bluff (Random Forest Regression)') +
 | 
						|
xlab('Level') +
 | 
						|
ylab('Salary')
 | 
						|
setwd("~/Desktop/Machine Learning A-Z/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------")
 | 
						|
dataset = read.csv('Data.csv')
 | 
						|
View(dataset)
 | 
						|
dataset$Age = ifelse(is.na(dataset$Age),
 | 
						|
ave(dataset$Age, dataset$Country, FUN = function(x) mean(x, na.rm = TRUE)),
 | 
						|
dataset$Age)
 | 
						|
dataset$Salary = ifelse(is.na(dataset$Salary),
 | 
						|
ave(dataset$Salary, dataset$Country, FUN = function(x) mean(x, na.rm = TRUE)),
 | 
						|
dataset$Salary)
 | 
						|
dataset = read.csv('Data.csv')
 | 
						|
setwd("~/Downloads/Drug Final/Drug Final")
 | 
						|
drugdata <- read.csv("Drug_data.csv", header = T)
 | 
						|
View(drugdata)
 | 
						|
drug_model = rpart(formula = DFREE ~ ., data = training_set)
 | 
						|
# Splitting the dataset into the Training set and Test set
 | 
						|
# install.packages('caTools')
 | 
						|
library(caTools)
 | 
						|
set.seed(123)
 | 
						|
split <- sample.split(drugdata$DFREE, SplitRatio = 0.75)
 | 
						|
training_set <- subset(drugdata,split == TRUE)
 | 
						|
test_set <- subset(drugdata,split == FALSE)
 | 
						|
library(rpart)
 | 
						|
library(partykit)
 | 
						|
drug_model = rpart(formula = DFREE ~ ., data = training_set)
 | 
						|
plot(drug_model)
 | 
						|
text(drug_model, pretty = 0)
 | 
						|
library(ROCR)
 | 
						|
ROCRpred <- prediction(test_set$prob[,1], test_set$DFREE)
 | 
						|
ROCRperf <- performance(ROCRpred, 'tpr','fpr')
 | 
						|
plot(ROCRperf, colorize = TRUE, text.adj = c(-0.2,1.7))
 | 
						|
abline(0,1, lty = 8, col = 'grey')
 | 
						|
AUCperf <- performance(ROCRpred, 'auc')
 | 
						|
attr(AUCperf, 'y.values')
 | 
						|
# Deciding the optimal cut off point.
 | 
						|
test_set$pred_et<- ifelse(test_set$prob > 0.27 , 1 ,0)
 | 
						|
test_set$pred_et <- as.factor(test_set$pred_et[,1])
 | 
						|
#Confusion Marix
 | 
						|
cm <- table(test_set$DFREE, test_set$pred_et)
 | 
						|
cm
 | 
						|
Efficiency <- sum(diag(cm))/sum(cm)
 | 
						|
Efficiency
 | 
						|
drugdata <- read.csv("Drug_data.csv", header = T)
 | 
						|
View(drugdata)
 | 
						|
# PCA
 | 
						|
# Importing the dataset
 | 
						|
dataset = read.csv('Wine.csv')
 | 
						|
# Splitting the dataset into the Training set and Test set
 | 
						|
# install.packages('caTools')
 | 
						|
library(caTools)
 | 
						|
set.seed(123)
 | 
						|
split = sample.split(dataset$Customer_Segment, SplitRatio = 0.8)
 | 
						|
training_set = subset(dataset, split == TRUE)
 | 
						|
test_set = subset(dataset, split == FALSE)
 | 
						|
# Feature Scaling
 | 
						|
training_set[-14] = scale(training_set[-14])
 | 
						|
test_set[-14] = scale(test_set[-14])
 | 
						|
# Applying PCA
 | 
						|
# install.packages('caret')
 | 
						|
library(caret)
 | 
						|
# install.packages('e1071')
 | 
						|
library(e1071)
 | 
						|
pca = preProcess(x = training_set[-14], method = 'pca', pcaComp = 2)
 | 
						|
training_set = predict(pca, training_set)
 | 
						|
training_set = training_set[c(2, 3, 1)]
 | 
						|
test_set = predict(pca, test_set)
 | 
						|
test_set = test_set[c(2, 3, 1)]
 | 
						|
a = 2
 | 
						|
a = 2
 | 
						|
dat <- readLines('ratings.dat')
 | 
						|
dat <- gsub("::", " ", dat)
 | 
						|
dat <- textConnection(dat)
 | 
						|
dat <- read.table(dat)
 | 
						|
setwd("~/Documents/Udemy/Teaching/Deep Learning A-Z/Datasets/ml-1m")
 | 
						|
dat <- readLines('ratings.dat')
 | 
						|
dat <- gsub("::", " ", dat)
 | 
						|
dat <- textConnection(dat)
 | 
						|
dat <- read.table(dat)
 | 
						|
View(dat)
 | 
						|
library(caTools)
 | 
						|
split = sample.split(dataset$V1, SplitRatio = 0.75)
 | 
						|
split = sample.split(dat$V1, SplitRatio = 0.75)
 | 
						|
training_set = subset(dat, split == TRUE)
 | 
						|
test_set = subset(dat, split == FALSE)
 | 
						|
View(training_set)
 | 
						|
View(test_set)
 | 
						|
library("foreign", lib.loc="/Library/Frameworks/R.framework/Versions/3.3/Resources/library")
 | 
						|
write.table(training_set, "training_set.dat", sep = "\t")
 | 
						|
write.table(test_set, "test_set.dat", sep = "\t")
 | 
						|
View(training_set)
 | 
						|
write.table(training_set-2, "training_set.txt", sep = "\t")
 | 
						|
dataset = read.csv('ratings_bis.csv', header = FALSE)
 | 
						|
dataset = read.delim('ratings_bis.csv', header = FALSE, sep = '\t')
 | 
						|
View(dataset)
 | 
						|
dataset = read.csv('ratings_bis.csv', header = FALSE)
 | 
						|
View(dataset)
 | 
						|
library(caTools)
 | 
						|
split = sample.split(dataset$V1, SplitRatio = 0.75)
 | 
						|
training_set = subset(dataset, split == TRUE)
 | 
						|
test_set = subset(dataset, split == FALSE)
 | 
						|
write.table(training_set, "training_set.csv")
 | 
						|
write.table(test_set, "test_set.csv")
 | 
						|
dataset = read.csv('ratings_bis.csv', header = FALSE)
 | 
						|
View(dataset)
 | 
						|
colnames(dataset) <- c("User","Movie","Rating","Timestamp")
 | 
						|
library(caTools)
 | 
						|
split = sample.split(dataset$User, SplitRatio = 0.75)
 | 
						|
training_set = subset(dataset, split == TRUE)
 | 
						|
test_set = subset(dataset, split == FALSE)
 | 
						|
write.csv(training_set, "training_set.csv")
 | 
						|
write.csv(test_set, "test_set.csv")
 | 
						|
write.csv(training_set, "training_set.csv")
 | 
						|
write.csv(test_set, "test_set.csv")
 | 
						|
View(training_set)
 | 
						|
training_set = training_set
 | 
						|
View(training_set)
 | 
						|
training_set = read.csv('training_set.csv')
 | 
						|
test_set = read.csv('test_set.csv')
 | 
						|
training_set = training_set[1:5]
 | 
						|
View(training_set)
 | 
						|
training_set = read.csv('training_set.csv')
 | 
						|
test_set = read.csv('test_set.csv')
 | 
						|
training_set = training_set[2:5]
 | 
						|
test_set = test_set[2:5]
 | 
						|
View(training_set)
 | 
						|
View(test_set)
 | 
						|
training_set = read.csv('training_set.csv')
 | 
						|
training_set = read.csv('training_set.csv')
 | 
						|
test_set = read.csv('test_set.csv')
 | 
						|
training_set = training_set[2:5]
 | 
						|
test_set = test_set[2:5]
 | 
						|
View(training_set)
 | 
						|
View(test_set)
 | 
						|
write.csv(training_set, "training_set.csv")
 | 
						|
write.csv(test_set, "test_set.csv")
 | 
						|
dataset = read.csv('ratings.csv', header = FALSE)
 | 
						|
colnames(dataset) = c("User","Movie","Rating","Timestamp")
 | 
						|
dataset = read.csv('ratings.csv', header = FALSE)
 | 
						|
colnames(dataset) = c("User","Movie","Rating","Timestamp")
 | 
						|
View(dataset)
 | 
						|
library(caTools)
 | 
						|
split = sample.split(dataset$User, SplitRatio = 0.75)
 | 
						|
training_set = subset(dataset, split == TRUE)
 | 
						|
test_set = subset(dataset, split == FALSE)
 | 
						|
write.csv(training_set, "training_set.csv")
 | 
						|
write.csv(test_set, "test_set.csv")
 |