\[X \sim N(\mu,\sigma)\]
where \(\mu\) and \(\sigma\) are the mean and standard deviation of \(X\) respectively.
# computing the percentage of racers faster than Leo
pnorm(4900,4300,500)
## [1] 0.8849303
# or, we can standardize the score first
pnorm(1.2)
## [1] 0.8849303
# Standardize your scores first and then use pnorm
# to find the area between two values you need to find the area below the larger value, minus the area below the smaller value
pnorm(1)-pnorm(-1)
## [1] 0.6826895
# this gives the area between 1 and -1 standard deviations on the standard normal curve
# to find the area to the right of a value do 1-pnorm(value)
1-pnorm(1)
## [1] 0.1586553
# a vector is a list of data, like the body mass of the penguins
library(palmerpenguins)
penguins <- penguins
mass <- penguins$body_mass_g
# a data frame shows up usually with rows and columns
# we can turn vectors into data frames
df_mass <- data.frame(weights = mass)
# some functions in R are meant for vectors or data frames, and they behave weirdly if you use the wrong type
# say we want to take 5 penguin body masses at random from our data set
sample(penguins$body_mass_g,5)
## [1] 4200 3450 3500 6000 4675
# maybe we can sample from the data frame instead of the vector
sample(penguins,5)
## # A tibble: 344 × 5
## island body_mass_g year species bill_length_mm
## <fct> <int> <int> <fct> <dbl>
## 1 Torgersen 3750 2007 Adelie 39.1
## 2 Torgersen 3800 2007 Adelie 39.5
## 3 Torgersen 3250 2007 Adelie 40.3
## 4 Torgersen NA 2007 Adelie NA
## 5 Torgersen 3450 2007 Adelie 36.7
## 6 Torgersen 3650 2007 Adelie 39.3
## 7 Torgersen 3625 2007 Adelie 38.9
## 8 Torgersen 4675 2007 Adelie 39.2
## 9 Torgersen 3475 2007 Adelie 34.1
## 10 Torgersen 4250 2007 Adelie 42
## # … with 334 more rows
# this does not give us 5 penguins
sample_n(penguins,5)
## # A tibble: 5 × 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <fct> <fct> <dbl> <dbl> <int> <int> <fct>
## 1 Chinst… Dream 46.9 16.6 192 2700 fema…
## 2 Chinst… Dream 43.5 18.1 202 3400 fema…
## 3 Chinst… Dream 42.4 17.3 181 3600 fema…
## 4 Gentoo Biscoe 47.6 14.5 215 5400 male
## 5 Adelie Dream 40.6 17.2 187 3475 male
## # … with 1 more variable: year <int>
# this function lives in the package tidyverse
# you might need to run library(tidyverse) to use this
# Make a uniform distribution
# a school has 5000 students, suppose each GPA is equally likely to occur
students <- data.frame(id= seq(1,5000,1),GPA = runif(5000, 0, 4))