basic statistical functions on the data set from the notes: {x_i, i=1,…,10} = {8, 3, 14, 1, 5, 7, 21, 4, 10, 3}
# input the data as a column vector 'x'
x <- c(8, 3, 14, 1, 5, 7, 21, 4, 10, 3)
# create a dataframe 'df' from the column vector 'x'
df <- data.frame(x)
head(df,2)
## x
## 1 8
## 2 3
# 'summary' gives basic statistics
summary(df) # note 1st and 3rd quartiles are true 25th and 75th percentiles
## x
## Min. : 1.00
## 1st Qu.: 3.25
## Median : 6.00
## Mean : 7.60
## 3rd Qu.: 9.50
## Max. :21.00
xbar <- mean(df$x) # mean of column 'x' in dataframe 'df'
s <- sd(df$x) # sample standard deviation
print results in markdown: mean = 7.6, standard dev = 6.0772801
options(repr.plot.width=5, repr.plot.height=3.5) # change default size all plots
# histogram of column 'x' in dataframe
hist(df$x, xlab = 'x', main='histogram', col='lightblue')
boxplot(df,xlab=names(df)) # boxplot of every column in dataframe
# empirical cumulative distribution function (ecdf)
Fn <- ecdf(df$x) # creates ecdf from column 'x' of dataframe 'df', saves as 'Fn'
plot(Fn,col="purple") # plot of ecdf (left continuous)