- R is a programming language
- R is open source (free)
- Many (but not all) R tools are open source
- R encourages good data habits
9/4/2018
x <- 1 y <- 2 x + y
## [1] 3
x <- c(1, 2) y <- c(3, 4) x + y
## [1] 4 6
x <- factor(c('green', 'blue', 'red'), levels = c('red', 'blue', 'green')) str(x)
## Factor w/ 3 levels "red","blue","green": 3 2 1
x <- cbind(c(1, 2), c(3, 4)) y <- cbind(c(5, 6), c(7, 8)) x + y
## [,1] [,2] ## [1,] 6 10 ## [2,] 8 12
x <- list('Numbers' = c(1, 2), 'Letters' = c('a', 'b')) y <- list('Numbers' = c(3, 4), 'Letters' = c('c', 'd')) str(list(x, y))
## List of 2 ## $ :List of 2 ## ..$ Numbers: num [1:2] 1 2 ## ..$ Letters: chr [1:2] "a" "b" ## $ :List of 2 ## ..$ Numbers: num [1:2] 3 4 ## ..$ Letters: chr [1:2] "c" "d"
x <- runif(100, 0, 1000) y <- factor(sample(c('red', 'blue', 'green'), 100, replace = T)) sample_table <- data.frame('category' = y, 'value' = x)
str(sample_table)
## 'data.frame': 100 obs. of 2 variables: ## $ category: Factor w/ 3 levels "blue","green",..: 3 3 2 1 3 1 1 2 1 1 ... ## $ value : num 356.2 341.5 377.5 795.9 98.2 ...
summary(sample_table)
## category value ## blue :37 Min. : 5.6 ## green:30 1st Qu.:343.2 ## red :33 Median :537.6 ## Mean :535.2 ## 3rd Qu.:758.7 ## Max. :995.0
More information @ tidyverse.org
name | treatmenta | treatmentb |
---|---|---|
John Smith | NA | 18 |
Jane Doe | 4 | 1 |
Mary Johnson | 6 | 7 |
name | treatment | n |
---|---|---|
John Smith | a | NA |
John Smith | b | 18 |
Jane Doe | a | 4 |
Jane Doe | b | 1 |
Mary Johnson | a | 6 |
Mary Johnson | b | 7 |
str(iris)
## 'data.frame': 150 obs. of 5 variables: ## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... ## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... ## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... ## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... ## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
head(iris, 3)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species ## 1 5.1 3.5 1.4 0.2 setosa ## 2 4.9 3.0 1.4 0.2 setosa ## 3 4.7 3.2 1.3 0.2 setosa
library(dplyr) iris %>% group_by(Species) %>% summarise( MIN = min(Petal.Width), MAX = max(Petal.Width), AVG = mean(Petal.Width), STD = sd(Petal.Width) )
## # A tibble: 3 x 5 ## Species MIN MAX AVG STD ## <fct> <dbl> <dbl> <dbl> <dbl> ## 1 setosa 0.1 0.6 0.246 0.105 ## 2 versicolor 1 1.8 1.33 0.198 ## 3 virginica 1.4 2.5 2.03 0.275
str(mtcars)
## 'data.frame': 32 obs. of 11 variables: ## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ... ## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ... ## $ disp: num 160 160 108 258 360 ... ## $ hp : num 110 110 93 110 175 105 245 62 95 123 ... ## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ... ## $ wt : num 2.62 2.88 2.32 3.21 3.44 ... ## $ qsec: num 16.5 17 18.6 19.4 17 ... ## $ vs : num 0 0 1 1 0 1 0 1 1 1 ... ## $ am : num 1 1 1 0 0 0 0 0 0 0 ... ## $ gear: num 4 4 4 3 3 3 3 4 4 4 ... ## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
library(ggplot2) ggplot(mtcars, aes(wt, mpg, color = factor(cyl), fill = factor(cyl))) + geom_point() + geom_smooth(method = 'lm') + theme_minimal()