library(readxl)
ageandheight <- read_excel("/Users/songweizhi/ageandheight.xls", sheet = "Hoja2") #Upload the data
ageandheight
# Simple linear regression
lmHeight = lm(height~age, data = ageandheight) #Create the linear regression
summary(lmHeight) #Review the results
# Multiple linear regression
lmHeight2 = lm(height~age + no_siblings, data = ageandheight) #Create a linear regression with two variables
summary(lmHeight2) #Review the results
# plot
plot(height~age, data = ageandheight, pch = 16, col = "blue") #Plot the results
abline(lmHeight)
# plot residuals
# Ideally, when you plot the residuals, they should look random.
# Otherwise means that maybe there is a hidden pattern that the linear model is not considering.
plot(lmHeight$residuals, pch = 16, col = "red")
# plot
plot(height~age, data = ageandheight, pch = 16, col = "blue") #Plot the results
abline(lmHeight)
# plot residuals
# Ideally, when you plot the residuals, they should look random.
# Otherwise means that maybe there is a hidden pattern that the linear model is not considering.
plot(lmHeight$residuals, pch = 16, col = "red")
# plot
plot(height~age, data = ageandheight, pch = 16, col = "blue") #Plot the results
scatter.smooth(x=cars$speed, y=cars$dist, main="Dist ~ Speed")  # scatterplot
# divide graph area in 2 columns
par(mfrow=c(1, 2))
# box plot for 'speed'
boxplot(cars$speed, main="Speed", sub=paste("Outlier rows: ", boxplot.stats(cars$speed)$out))
# box plot for 'distance'
boxplot(cars$dist, main="Distance", sub=paste("Outlier rows: ", boxplot.stats(cars$dist)$out))
# load library
library(e1071)
# divide graph area in 2 columns
par(mfrow=c(1, 2))
# density plot for 'speed'
plot(density(cars$speed), main="Density Plot: Speed", ylab="Frequency", sub=paste("Skewness:", round(e1071::skewness(cars$speed), 2)))
polygon(density(cars$speed), col="red")
# density plot for 'dist'
plot(density(cars$dist), main="Density Plot: Distance", ylab="Frequency", sub=paste("Skewness:", round(e1071::skewness(cars$dist), 2)))
polygon(density(cars$dist), col="red")
# calculate correlation between speed and distance
cor(cars$speed, cars$dist)
# build linear regression model on full data
linearMod <- lm(dist ~ speed, data=cars)
print(linearMod)
# print the summary statistics for obtained linear model
modelSummary <- summary(linearMod)
modelSummary
# get model coefficients
modelCoeffs <- modelSummary$coefficients
modelCoeffs
library(readxl)
ageandheight <- read_excel("/Users/songweizhi/ageandheight.xls", sheet = "Hoja2") #Upload the data
ageandheight
# Simple linear regression
lmHeight = lm(height~age, data = ageandheight) #Create the linear regression
summary(lmHeight) #Review the results
# Multiple linear regression
lmHeight2 = lm(height~age + no_siblings, data = ageandheight) #Create a linear regression with two variables
summary(lmHeight2) #Review the results
# plot
plot(height~age, data = ageandheight, pch = 16, col = "blue") #Plot the results
abline(lmHeight)
# plot residuals
# Ideally, when you plot the residuals, they should look random.
# Otherwise means that maybe there is a hidden pattern that the linear model is not considering.
plot(lmHeight$residuals, pch = 16, col = "red")
# plot
plot(height~age, data = ageandheight, pch = 16, col = "blue") #Plot the results
text('goof')
abline(lmHeight)
# plot residuals
# Ideally, when you plot the residuals, they should look random.
# Otherwise means that maybe there is a hidden pattern that the linear model is not considering.
plot(lmHeight$residuals, pch = 16, col = "red")
library(readxl)
ageandheight <- read_excel("/Users/songweizhi/ageandheight.xls", sheet = "Hoja2") #Upload the data
ageandheight
# Simple linear regression
lmHeight = lm(height~age, data = ageandheight) #Create the linear regression
summary(lmHeight) #Review the results
# Multiple linear regression
lmHeight2 = lm(height~age + no_siblings, data = ageandheight) #Create a linear regression with two variables
summary(lmHeight2) #Review the results
# plot
plot(height~age, data = ageandheight, pch = 16, col = "blue") #Plot the results
# plot the dots
plot(height~age, data = ageandheight, pch = 16, col = "blue") #Plot the results
# add the regression line
abline(lmHeight)
# plot residuals
# Ideally, when you plot the residuals, they should look random.
# Otherwise means that maybe there is a hidden pattern that the linear model is not considering.
plot(lmHeight$residuals, pch = 16, col = "red")
# plot the dots
plot(height~age, data = ageandheight, pch = 16, col = "blue") #Plot the results
# add the regression line
abline(lmHeight)
# plot the dots
plot(height~age, data = ageandheight, pch = 16, col = "blue") #Plot the results
# add the regression line
abline(lmHeight)
regression_summary = summary(lmHeight)
r2 = regression_summary$adj.r.squared
r2
regression_summary
p_value = regression_summary$coefficients[2,4]
p_value
# plot the dots
plot(height~age, data = ageandheight, pch = 16, col = "blue") #Plot the results
# add the regression line
abline(lmHeight)
# get the value of r2 and p
regression_summary = summary(lmHeight)
r2 = regression_summary$adj.r.squared
p_value = regression_summary$coefficients[2,4]
# add r2 and p to the plot
rp = vector('expression',2)
rp[1] = substitute(expression(italic(R)^2 == MYVALUE),
list(MYVALUE = format(r2,dig=3)))[2]
rp[2] = substitute(expression(italic(p) == MYOTHERVALUE),
list(MYOTHERVALUE = format(my.p, digits = 2)))[2]
legend('topright', legend = rp, bty = 'n')
# plot residuals
# Ideally, when you plot the residuals, they should look random.
# Otherwise means that maybe there is a hidden pattern that the linear model is not considering.
plot(lmHeight$residuals, pch = 16, col = "red")
plot(height~age, data = ageandheight, pch = 16, col = "blue") #Plot the results
# add the regression line
abline(lmHeight)
# get the value of r2 and p
regression_summary = summary(lmHeight)
r2 = regression_summary$adj.r.squared
p_value = regression_summary$coefficients[2,4]
# add r2 and p to the plot
rp = vector('expression',2)
rp[1] = substitute(expression(italic(R)^2 == MYVALUE),
list(MYVALUE = format(r2,dig=3)))[2]
rp[2] = substitute(expression(italic(p) == MYOTHERVALUE),
list(MYOTHERVALUE = format(my.p, digits = 2)))[2]
legend('topright', legend = rp, bty = 'n')
# plot the dots
plot(height~age, data = ageandheight, pch = 16, col = "blue") #Plot the results
# add the regression line
abline(lmHeight)
# get the value of r2 and p
regression_summary = summary(lmHeight)
r2 = regression_summary$adj.r.squared
p_value = regression_summary$coefficients[2,4]
# add r2 and p to the plot
rp = vector('expression',2)
rp[1] = substitute(expression(italic(R)^2 == MYVALUE),
list(MYVALUE = format(r2,dig=3)))[2]
rp[2] = substitute(expression(italic(p) == MYOTHERVALUE),
list(MYOTHERVALUE = format(my.p, digits = 2)))[2]
legend('topleft', legend = rp, bty = 'n')
# plot the dots
plot(height~age, data = ageandheight, pch = 16, col = "blue") #Plot the results
# add the regression line
abline(lmHeight)
# get the value of r2 and p
regression_summary = summary(lmHeight)
r2 = regression_summary$adj.r.squared
p_value = regression_summary$coefficients[2,4]
# add r2 and p to the plot
rp = vector('expression',2)
rp[1] = substitute(expression(italic(R)^2 == MYVALUE),
list(MYVALUE = format(r2,dig=3)))[2]
rp[2] = substitute(expression(italic(p) == MYOTHERVALUE),
list(MYOTHERVALUE = format(p_value, digits = 2)))[2]
legend('topleft', legend = rp, bty = 'n')
# Install required R packages
#install.packages("dplyr")
#install.packages("ggpubr")
library("dplyr")
library("ggpubr")
# Import data
my_data <- ToothGrowth
my_data
# print a random sample of 10 rows using the sample_n() function
set.seed(1234)
dplyr::sample_n(my_data, 10)
# Visual methods: Density plot
library("ggpubr")
ggdensity(my_data$len, main = "Density plot of tooth length", xlab = "Tooth length")
# Visual methods: Q-Q plot
library("ggpubr")
ggqqplot(my_data$len)
# perform Shapiro-Wilk test
shapiro.test(my_data$len)
