Today’s Agenda
- Measures of Centrality
- Skewness
- Graphs
# loading palmer penguin data
library(palmerpenguins)
Measure of Centrality
- mean and median are super easy to calculate in R
# mean and median of penguin body mass
mean(penguins$body_mass_g, na.rm = TRUE)
## [1] 4201.754
median(penguins$body_mass_g, na.rm = TRUE)
## [1] 4050
# mean is sensitive to outliers and the median is not very
Graphs
# a histogram for penguin weights
hist(penguins$body_mass_g)
# more bins!
hist(penguins$body_mass_g, breaks=100)
# can better see where there is dips in the data
# we can see outliers better
# easier to identify groups/species
# harder to see how big bins are
hist(penguins$body_mass_g, breaks=1000)
# you as the statistician have to play this balancing game of deciding how many bins is best for your data
# other aesthetic improvements
hist(penguins$body_mass_g,
breaks=50,
main = "Histogram of Penguins Sizes",
xlab = "Body Mass (g)",
col = "blue")
boxplot(penguins$body_mass_g ~ penguins$species,
xlab = "Species",
ylab = "Body Mass (g)")