12

Dec 17

Bacheca » Script R

# The dataset contains five columns
# Ambient Temperature (AT);  Ambient Pressure (AP)
# Relative Humidity (RH); Exhaust Vacuum (EV) 
# net hourly electrical energy output (PE) of the plant (y)
# READING DATA AND SPLITTING
wd<-'C:/data/EnergyCourse'; setwd(wd)
library(xlsx);library(rJava);library(xlsxjars)

powerData <- read.xlsx('Folds5x2_pp.xlsx', 1)
head(powerData)

#Next, we need to split the data into a training set and a testing set. 
#75% of the data in the training set and 25% of the data in the testing set
set.seed(123)
split <- sample(seq_len(nrow(powerData)), size = floor(0.75 * nrow(powerData)))
trainData <- powerData[split, ]
testData <- powerData[-split, ]
head(trainData)
head(testData)

#caratteristiche db
summary(trainData)
dim(trainData)
summary(testData)
dim(testData)

# Building the predictive model modello di regressione
predictionModel <- lm(PE ~ AT + V + AP + RH, data = trainData)
summary(predictionModel)

# Testing the prediction model
prediction <- predict(predictionModel, newdata = testData)
head(prediction) #valori previsti
head(testData$PE) # valori osservati

# Building R^2 on test set
SSE <- sum((testData$PE - prediction) ^ 2)
SST <- sum((testData$PE - mean(testData$PE)) ^ 2)
1 - SSE/SST