# Answers to some class 1 questions
# as shown by examples in R
###############
# Q-Q plots
# The command 'qqnorm' compares your data to a normal dataset of the same size
# Do a Q-Q plot comparing energy$lean to a normal distribution
qqnorm(energy$lean, pch=19, cex=2)
# Get the normal distribution that was created for the plot
norm.data = qqnorm(energy$lean)$x
# Take a look at it
norm.data
# Note that its median is 0 and stddev ~1:
median(norm.data) # 0
sd(norm.data) # 0.9910767
# Redo the same plot by hand, by entering x-values (norm.data) and y-values (energy$lean)
plot(norm.data, energy$lean, pch=19, cex=2)
# See how a Q-Q plot for a normal dataset would be by plotting the data against itself
# Note how all points fall on the diagonal line
qqplot(energy$lean, energy$lean, pch=19, cex=2)
################
# The Shapiro-Wilk test of normality
# Tests that the sample dataset comes from a normally distributed population
shapiro.test(energy$lean) # p-value = 0.04818
shapiro.test(norm.data) # p-value = 1
################
# Get a mean, removing NA's ("Not Available", indicating empty cells) from a dataset
mean(energy$obese, na.rm=T)
################
# Geometric mean
# Make a set of numbers
nums = c(2, 3, 7, 8)
nums
# Get the geometric product
# which is to get the product of all the numbers in the set
# and take the nth root (where n is the size of the set)
cumprod(nums)[length(nums)]**(1/length(nums))
# Answer is 4.28139