Core Data Types in R

Vectors

> b = c(3,4,5, 8, 10)
> v = c(-3:4)
> v <- -3:4
> v = c(1:4, 8:10)
> b[2]
> v2  = seq(from=0, to=1, by=0.25)
> 2*1:4
[1] 2 4 6 8
> 10:1
[1] 10 9 8 7 6 5 4 3 2 1
> seq(from=1, by=.5, length=4)
[1] 1.0 1.5 2.0 2.5
> seq(to=1, by=.5, length=4)
[1] -0.5 0.0 0.5 1.0
> seq(from=4, to=-4, length=5)
[1] 4 2 0 -2 -4
> v3 = v1 + v2
> s = sum(v1)
> x <- c(1,3, 2, -1, 4,-6)
> cumsum(x)
[1] 1 4 6 5 9 3
> x <- c(1,3, 2, -1, 4,-6)
> prod(x)
[1] 144
> sort(c(3,2,1))
> v = c(1:10)
> v = [1:4]
> x <- 1:4
> names(x) <- c("a", "b", "c", "d")
> x
a b c d
1 2 3 4
> x["a"]
a
1
> x["e"]
<NA>
NA
> e <- numeric()
> e
numeric(0)
> e <- character()
> e
character(0)
> e <- complex()
> e
complex(0)
> e <- logical()
> e
logical(0)
> e <- numeric()
> e[3]
[1] NA
> e[3] <- 10
> e[3]
[1] 10
> e
[1] NA NA 10
> x <- 1:10
> x
[1] 1 2 3 4 5 6 7 8 9 10
> x <- x[2:4]
> x
[1] 2 3 4
> rev(1:3)
[1] 3 2 1
> head(1:8, n=4)
[1] 1 2 3 4
> tail(1:8, n=4)
[1] 5 6 7 8
> x <- c(1,2,3)
> y <- c(4,5,6)
> z <- c(rbind(x,y))
>
> z
[1] 1 4 2 5 3 6
> c(1, 0, 1) %*% c(-1, 0, -1)
[,1]
[1,] -2
> v1 <- 1:3
> v2 <- 2:4
> v1 %o% v2
[,1] [,2] [,3]
[1,] 2 3 4
[2,] 4 6 8
[3,] 6 9 12
> outer(v1,v2)
[,1] [,2] [,3]
[1,] 2 3 4
[2,] 4 6 8
[3,] 6 9 12
> outer(v1,v2, '+')
[,1] [,2] [,3]
[1,] 3 4 5
[2,] 4 5 6
[3,] 5 6 7
> outer(v1,v2, '-')
[,1] [,2] [,3]
[1,] -1 -2 -3
[2,] 0 -1 -2
[3,] 1 0 -1
> x <- seq(0, 1, by=0.5)
> x
[1] 0.0 0.5 1.0
> y <- seq(0, 1, by=0.2)
> f <- function(x, y) x*y /(x+y+1)
> outer(x,y, f)
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 0 0.00000000 0.0000000 0.0000000 0.0000000 0.0000000
[2,] 0 0.05882353 0.1052632 0.1428571 0.1739130 0.2000000
[3,] 0 0.09090909 0.1666667 0.2307692 0.2857143 0.3333333
> outer(2:11, 1:10)
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 2 4 6 8 10 12 14 16 18 20
[2,] 3 6 9 12 15 18 21 24 27 30
[3,] 4 8 12 16 20 24 28 32 36 40
[4,] 5 10 15 20 25 30 35 40 45 50
[5,] 6 12 18 24 30 36 42 48 54 60
[6,] 7 14 21 28 35 42 49 56 63 70
[7,] 8 16 24 32 40 48 56 64 72 80
[8,] 9 18 27 36 45 54 63 72 81 90
[9,] 10 20 30 40 50 60 70 80 90 100
[10,] 11 22 33 44 55 66 77 88 99 110
> v <- 1:3
> v
[1] 1 2 3
> t(v)
[,1] [,2] [,3]
[1,] 1 2 3
> t(t(v))
[,1]
[1,] 1
[2,] 2
[3,] 3
> dim(v) <- c(3,1)
> v
[,1]
[1,] 1
[2,] 2
[3,] 3
> dim(v) <- c(1,3)
> v
[,1] [,2] [,3]
[1,] 1 2 3
> rbind(v)
> cbind(v)
> v <- 1:4
> rep(v, 4)
[1] 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4
> rep(v, 4, length.out=10)
[1] 1 2 3 4 1 2 3 4 1 2
> rep(v, times=3, each=2)
[1] 1 1 2 2 3 3 4 4 1 1 2 2 3 3 4 4 1 1 2 2 3 3 4 4
> rep(v, c(1,2,3,4))
[1] 1 2 2 3 3 3 4 4 4 4
> 1 %in% 1:4
[1] TRUE
> 1 %in% 2:4
[1] FALSE
> 3 %in% 1:4
[1] TRUE
> 3 %in% c(1:2, 4:8)
[1] FALSE
> 'a' %in% c('a', 'b', 'c')
[1] TRUE
> 'aa' %in% c('a', 'aab', 'c')
[1] FALSE
> 'aa' %in% c('a', 'aa', 'c')
[1] TRUE

Index Vectors

> x
[1] 1 4 NA 5 NaN
> is.na(x)
[1] FALSE FALSE TRUE FALSE TRUE
> y <- x[!is.na(x)]
> y
[1] 1 4 5
> x <- sample(1:10, 10)
> x
[1] 4 1 3 7 9 10 5 2 8 6
> x[c(1,4,7,10)]
[1] 4 7 5 6
> x[seq(1,10, 2)]
[1] 4 3 9 5 8
> x[c(1:4, 1:4)]
[1] 4 1 3 7 4 1 3 7
> paste(c("x","y")[rep(c(1,2,2,1), times=4)], collapse='')
[1] "xyyxxyyxxyyxxyyx"
> x
[1] 8 4 3 7 10 5 9 6 2 1
> x[-c(1,4,8:10)]
[1] 4 3 10 5 9
> x <- 1:4
> names(x) <- c("a", "b", "c", "d")
> x[c("c", "b")]
c b
3 2

Matrices

> m = matrix(c(1:12), nrow=3)
> m
[,1] [,2] [,3] [,4]
[1,] 1 4 7 10
[2,] 2 5 8 11
[3,] 3 6 9 12
> m = matrix(c(1:12), ncol=3)
> m
[,1] [,2] [,3]
[1,] 1 5 9
[2,] 2 6 10
[3,] 3 7 11
[4,] 4 8 12
> m <- matrix(1:6, nrow=2)
> nrow(m)
[1] 2
> ncol(m)
[1] 3
> dim(m)
[1] 4 3
> m[1,1]
[1] 1
> m[1,]
[1] 1 5 9
> m[,1]
[1] 1 2 3 4
> m[1:2,]
[,1] [,2] [,3]
[1,] 1 5 9
[2,] 2 6 10
> m[1:2, 2:3]
[,1] [,2]
[1,] 5 9
[2,] 6 10
> sum(m)
[1] 78
> rowSums(m)
[1] 15 18 21 24
> colSums(m)
[1] 10 26 42
> mean(m)
[1] 6.5
> rowMeans(m)
[1] 5 6 7 8
> colMeans(m)
[1] 2.5 6.5 10.5
> A <- matrix(c(3, 2, -1, 2, -2, .5, -1, 4, -1), nrow=3)
> colMeans(A)
[1] 1.3333333 0.1666667 0.6666667
> B <- scale(A, scale=F)
> round(colMeans(B), digits=2)
[1] 0 0 0
> round(B, digits=2)
[,1] [,2] [,3]
[1,] 1.67 1.83 -1.67
[2,] 0.67 -2.17 3.33
[3,] -2.33 0.33 -1.67
attr(,"scaled:center")
[1] 1.3333333 0.1666667 0.6666667
> cbind(1:4, 2:5, 3:6, 4:7)
[,1] [,2] [,3] [,4]
[1,] 1 2 3 4
[2,] 2 3 4 5
[3,] 3 4 5 6
[4,] 4 5 6 7
> rbind(1:4, 2:5, 3:6, 4:7)
[,1] [,2] [,3] [,4]
[1,] 1 2 3 4
[2,] 2 3 4 5
[3,] 3 4 5 6
[4,] 4 5 6 7
> m <- cbind(1:4, 2:5)
> m <- cbind(m, 3:6)
> m <- rbind(m, 9:11)
> m
[,1] [,2] [,3]
[1,] 1 2 3
[2,] 2 3 4
[3,] 3 4 5
[4,] 4 5 6
[5,] 9 10 11
> matrix(0, 2,3)
[,1] [,2] [,3]
[1,] 0 0 0
[2,] 0 0 0
> matrix(1, 2,3)
[,1] [,2] [,3]
[1,] 1 1 1
[2,] 1 1 1
> diag(3)
[,1] [,2] [,3]
[1,] 1 0 0
[2,] 0 1 0
[3,] 0 0 1
> diag(1:3)
[,1] [,2] [,3]
[1,] 1 0 0
[2,] 0 2 0
[3,] 0 0 3
> diag(c(3, 10, 11))
[,1] [,2] [,3]
[1,] 3 0 0
[2,] 0 10 0
[3,] 0 0 11
> diag(c(3, 10, 11), ncol=5)
[,1] [,2] [,3] [,4] [,5]
[1,] 3 0 0 0 0
[2,] 0 10 0 0 0
[3,] 0 0 11 0 0
> diag(c(3, 10, 11), nrow=5)
[,1] [,2] [,3] [,4] [,5]
[1,] 3 0 0 0 0
[2,] 0 10 0 0 0
[3,] 0 0 11 0 0
[4,] 0 0 0 3 0
[5,] 0 0 0 0 10
> m <- matrix(1:6, nrow=2)
> m
[,1] [,2] [,3]
[1,] 1 3 5
[2,] 2 4 6
> diag(m)
[1] 1 4
> matrix(1:6, nrow=2)
[,1] [,2] [,3]
[1,] 1 3 5
[2,] 2 4 6
> t(matrix(1:6, nrow=2))
[,1] [,2]
[1,] 1 2
[2,] 3 4
[3,] 5 6
> A <- matrix(c(1:12), nrow=3)
> A
[,1] [,2] [,3] [,4]
[1,] 1 4 7 10
[2,] 2 5 8 11
[3,] 3 6 9 12
> A + A
[,1] [,2] [,3] [,4]
[1,] 2 8 14 20
[2,] 4 10 16 22
[3,] 6 12 18 24
> A - A
[,1] [,2] [,3] [,4]
[1,] 0 0 0 0
[2,] 0 0 0 0
[3,] 0 0 0 0
> A * A
[,1] [,2] [,3] [,4]
[1,] 1 16 49 100
[2,] 4 25 64 121
[3,] 9 36 81 144
> A / A
[,1] [,2] [,3] [,4]
[1,] 1 1 1 1
[2,] 1 1 1 1
[3,] 1 1 1 1
> A^3
[,1] [,2] [,3] [,4]
[1,] 1 64 343 1000
[2,] 8 125 512 1331
[3,] 27 216 729 1728
> A^(0.5)
[,1] [,2] [,3] [,4]
[1,] 1.000000 2.000000 2.645751 3.162278
[2,] 1.414214 2.236068 2.828427 3.316625
[3,] 1.732051 2.449490 3.000000 3.464102
> v <- c(2,1,4)
> A + v
[,1] [,2] [,3] [,4]
[1,] 3 6 9 12
[2,] 3 6 9 12
[3,] 7 10 13 16
> A - v
[,1] [,2] [,3] [,4]
[1,] -1 2 5 8
[2,] 1 4 7 10
[3,] -1 2 5 8
> A * v
[,1] [,2] [,3] [,4]
[1,] 2 8 14 20
[2,] 2 5 8 11
[3,] 12 24 36 48
> A / v
[,1] [,2] [,3] [,4]
[1,] 0.50 2.0 3.50 5
[2,] 2.00 5.0 8.00 11
[3,] 0.75 1.5 2.25 3
> v <- c(2, 3, 1, 4)
> t(t(A) + v)
[,1] [,2] [,3] [,4]
[1,] 3 7 8 14
[2,] 4 8 9 15
[3,] 5 9 10 16
> t(t(A) - v)
[,1] [,2] [,3] [,4]
[1,] -1 1 6 6
[2,] 0 2 7 7
[3,] 1 3 8 8
> t(t(A) * v)
[,1] [,2] [,3] [,4]
[1,] 2 12 7 40
[2,] 4 15 8 44
[3,] 6 18 9 48
> t(t(A) / v)
[,1] [,2] [,3] [,4]
[1,] 0.5 1.333333 7 2.50
[2,] 1.0 1.666667 8 2.75
[3,] 1.5 2.000000 9 3.00
> A + rep(v, each=3)
[,1] [,2] [,3] [,4]
[1,] 3 7 8 14
[2,] 4 8 9 15
[3,] 5 9 10 16
> A - rep(v, each=3)
[,1] [,2] [,3] [,4]
[1,] -1 1 6 6
[2,] 0 2 7 7
[3,] 1 3 8 8
> A * rep(v, each=3)
[,1] [,2] [,3] [,4]
[1,] 2 12 7 40
[2,] 4 15 8 44
[3,] 6 18 9 48
> A / rep(v, each=3)
[,1] [,2] [,3] [,4]
[1,] 0.5 1.333333 7 2.50
[2,] 1.0 1.666667 8 2.75
[3,] 1.5 2.000000 9 3.00
> m <- matrix(1:4, nrow=2)
> m %*% m
[,1] [,2]
[1,] 7 15
[2,] 10 22
> v = c(1:2)
> v %*% m %*% v
[,1]
[1,] 27
> A <- matrix(c(1,1,1,3,0,2), nrow=3)
> B <- matrix(c(0,7,2,0,5,1), nrow=3)
> A
[,1] [,2]
[1,] 1 3
[2,] 1 0
[3,] 1 2
> B
[,1] [,2]
[1,] 0 0
[2,] 7 5
[3,] 2 1
> t(A) %*% B
[,1] [,2]
[1,] 9 6
[2,] 4 2
> crossprod(A, B)
[,1] [,2]
[1,] 9 6
[2,] 4 2
> A %*% t(B)
[,1] [,2] [,3]
[1,] 0 22 5
[2,] 0 7 2
[3,] 0 17 4
> tcrossprod(A, B)
[,1] [,2] [,3]
[1,] 0 22 5
[2,] 0 7 2
[3,] 0 17 4
> A <- matrix(c(1,1,1,3,0,2), nrow=3)
> t(A) %*% A
[,1] [,2]
[1,] 3 5
[2,] 5 13
> crossprod(A)
[,1] [,2]
[1,] 3 5
[2,] 5 13
> A <- matrix(c(1,1,1,3,0,2), nrow=3)
> A %*% t(A)
[,1] [,2] [,3]
[1,] 10 1 7
[2,] 1 1 1
[3,] 7 1 5
> m1 <- matrix(1:4, nrow=2)
> m2 <- matrix(c(1,3,5,7), nrow=2)
> outer(m1, m2)
, , 1, 1
[,1] [,2]
[1,] 1 3
[2,] 2 4
, , 2, 1 [,1] [,2]
[1,] 3 9
[2,] 6 12
, , 1, 2 [,1] [,2]
[1,] 5 15
[2,] 10 20
, , 2, 2 [,1] [,2]
[1,] 7 21
[2,] 14 28
> m <- matrix(c(1:4), nrow=2)
> colnames(m) <- c("x", "y")
> rownames(m) <- c("a", "b")
> m
x y
a 1 3
b 2 4

Arrays

> a <- array(1:10, dim=c(4,4,4))
> a
, , 1
[,1] [,2] [,3] [,4]
[1,] 1 5 9 3
[2,] 2 6 10 4
[3,] 3 7 1 5
[4,] 4 8 2 6
, , 2 [,1] [,2] [,3] [,4]
[1,] 7 1 5 9
[2,] 8 2 6 10
[3,] 9 3 7 1
[4,] 10 4 8 2
, , 3 [,1] [,2] [,3] [,4]
[1,] 3 7 1 5
[2,] 4 8 2 6
[3,] 5 9 3 7
[4,] 6 10 4 8
, , 4 [,1] [,2] [,3] [,4]
[1,] 9 3 7 1
[2,] 10 4 8 2
[3,] 1 5 9 3
[4,] 2 6 10 4
> dim(a)
[1] 4 4 4
> a[1,1,1]
[1] 1
> a[1,2, 1:4]
[1] 5 1 7 3
>
> x <- 1:18
> dim(x) <- c(2,3,3)
> x
, , 1
[,1] [,2] [,3]
[1,] 1 3 5
[2,] 2 4 6
, , 2 [,1] [,2] [,3]
[1,] 7 9 11
[2,] 8 10 12
, , 3 [,1] [,2] [,3]
[1,] 13 15 17
[2,] 14 16 18
> a <- array(1:4, dim=c(2,3,3))
> a
, , 1
[,1] [,2] [,3]
[1,] 1 3 1
[2,] 2 4 2
, , 2 [,1] [,2] [,3]
[1,] 3 1 3
[2,] 4 2 4
, , 3 [,1] [,2] [,3]
[1,] 1 3 1
[2,] 2 4 2
> a <- array(1:4, dim=c(2,3,4))
> b <- aperm(a, perm=c(3,2, 1))
> dim(b)
[1] 4 3 2

Index Matrices

> data <- array(1:20, dim=c(5,4))
> data
[,1] [,2] [,3] [,4]
[1,] 1 6 11 16
[2,] 2 7 12 17
[3,] 3 8 13 18
[4,] 4 9 14 19
[5,] 5 10 15 20
> indices <- cbind(c(1,2,3), c(1,3,2))
> indices
[,1] [,2]
[1,] 1 1
[2,] 2 3
[3,] 3 2
> data[indices]
[1] 1 12 8
> data[indices] <- 0
> data
[,1] [,2] [,3] [,4]
[1,] 0 6 11 16
[2,] 2 7 0 17
[3,] 3 0 13 18
[4,] 4 9 14 19
[5,] 5 10 15 20
> indices <- cbind(c(1,2,3, NA, 2), c(2,3,4, 2, 0))
> data[indices]
[1] 6 0 18 NA
> m <- matrix(1:9, nrow=3)
> m
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
> indices = cbind(1:3, rev(1:3))
> indices
[,1] [,2]
[1,] 1 3
[2,] 2 2
[3,] 3 1
> m[indices]
[1] 7 5 3
> m <- matrix(0, 3,3)
> m[indices] = 1
> m
[,1] [,2] [,3]
[1,] 0 0 1
[2,] 0 1 0
[3,] 1 0 0

The recycling rule

  • The expression is scanned from left to right.
  • Any short vector operands are extended by recycling their values until they match the size of any other operands.
  • As long as short vectors and arrays only are encountered, the arrays must all have the same dim attribute or an error results.
  • Any vector operand longer than a matrix or array operand generates an error.
  • If array structures are present and no error or coercion to vector has been precipitated, the result is an array structure with the common dim attribute of its array operands.

Lists

> l = list(a=c(1,2,3), b=c(1:10), c=3)
> l
$a
[1] 1 2 3
$b
[1] 1 2 3 4 5 6 7 8 9 10
$c
[1] 3
> l$a
[1] 1 2 3
> l$b
[1] 1 2 3 4 5 6 7 8 9 10
> l$c
[1] 3
> names(l)
[1] "a" "b" "c"
> l[[1]]
[1] 1 2 3
> l[[2]]
[1] 1 2 3 4 5 6 7 8 9 10
> l[[3]]
[1] 3
> l$a
[1] 1 2 3
> l$c
[1] 3
> l$c + 2
[1] 5
> l$b + 3
[1] 4 5 6 7 8 9 10 11 12 13
> l$a * l$a
[1] 1 4 9
> l[['a']]
[1] 1 2 3
> l[['b']]
[1] 1 2 3 4 5 6 7 8 9 10
> l[['c']]
[1] 3
> l[1]
$a
[1] 1 2 3
> l[c(1,2)]
$a
[1] 1 2 3
$b
[1] 1 2 3 4 5 6 7 8 9 10
> for (name in names(l)){print(l[[name]])}
[1] 1 2 3
[1] 1 2 3 4 5 6 7 8 9 10
[1] 3
> for (name in names(l)){print(c(name,":", l[[name]]), quote=FALSE)}
[1] a : 1 2 3
[1] b : 1 2 3 4 5 6 7 8 9 10
[1] c : 3
[1] d : 4
[1] e : 5
> l[length(l)] <- NULL
> length(l)
[1] 4
> for (name in names(l)){print(c(name,":", l[[name]]), quote=FALSE)}
[1] a : 1 2 3
[1] b : 1 2 3 4 5 6 7 8 9 10
[1] c : 3
[1] d : 4
> l[['c']] <- NULL
> names(l)
[1] "a" "b" "d"
> for (name in names(l)){print(c(name,":", l[[name]]), quote=FALSE)}
[1] a : 1 2 3
[1] b : 1 2 3 4 5 6 7 8 9 10
[1] d : 4
> length(l)
[1] 3
> l2 <- list(1,2,"hello")
> l2
[[1]]
[1] 1
[[2]]
[1] 2
[[3]]
[1] "hello"
> names(l2) <- c("x", "y", "z")
> l2
$x
[1] 1
$y
[1] 2
$z
[1] "hello"
> c(l, l2)
$a
[1] 1 2 3
$b
[1] 1 2 3 4 5 6 7 8 9 10
$c
[1] 3
$x
[1] 1
$y
[1] 2
$z
[1] "hello"
> l <- list (a=1, b=2, c=4)
> unlist(l)
a b c
1 2 4
> names(unlist(l))
[1] "a" "b" "c"
> list(a=2+3, b=4*3)
$a
[1] 5
$b
[1] 12
> alist(a=2+3, b=4*3)
$a
2 + 3
$b
4 * 3
> l <- alist(a=2+3, b=4*3)
> l$a
2 + 3
> eval(l$a)
[1] 5
> eval(l$b)
[1] 12

Factors

>  v <- c(1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4)
> vf <- factor(v)
> levels(vf)
[1] "1" "2" "3" "4"
> vf
[1] 1 1 2 2 2 3 3 3 3 4 4
Levels: 1 2 3 4
> vf <- factor(v, levels=c(1,2,3,4), ordered=TRUE)
> vf
[1] 1 1 2 2 2 3 3 3 3 4 4
Levels: 1 < 2 < 3 < 4
> mean(as.numeric(levels(vf)[vf]))
[1] 2.545455
> colors <- sample(c("red", "green", "blue"), 10, replace = TRUE)
> colors <- factor(colors)
> colors
[1] blue green green blue green blue red red blue red
Levels: blue green red
> levels(colors)
[1] "blue" "green" "red"
> colors <- c('r', 'r', 'g', 'b', 'r', 'g', 'g', 'b', 'b', 'r')
> length(colors)
[1] 10
> lengths <-c(1, 1, 2, 2, 1, 1, 1, 2, 2, 3)
> length(lengths)
[1] 10
> colorsf <- factor(colors)
> mean(lengths)
[1] 1.6
> tapply(lengths, colorsf, mean)
b g r
2.000000 1.333333 1.500000
> gl(2,8)
[1] 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2
Levels: 1 2
> as.integer(gl(2,8))
[1] 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2
> gl(2,8, labels=c("x", "y"))
[1] x x x x x x x x y y y y y y y y
Levels: x y
> as.integer(gl(2,8, labels=c("x", "y")))
[1] 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2
> gl(2,8, labels=c("c", "b"), ordered=TRUE)
[1] c c c c c c c c b b b b b b b b
Levels: c < b
> gl(2,1,10)
[1] 1 2 1 2 1 2 1 2 1 2
Levels: 1 2
> gl(2,2,10)
[1] 1 1 2 2 1 1 2 2 1 1
Levels: 1 2
> gl(2,2,12)
[1] 1 1 2 2 1 1 2 2 1 1 2 2
Levels: 1 2
> gl(2,3,12)
[1] 1 1 1 2 2 2 1 1 1 2 2 2
Levels: 1 2

Data Frames

> df <- data.frame(x=c(11,12,13), y=c(21,22,23), z=c(7,20, 10))
x y z
1 11 21 7
2 12 22 20
3 13 23 10
> df[1,]
x y z
1 11 21 7
> colnames(df)
[1] "x" "y" "z"
> rownames(df)
[1] "1" "2" "3"
> df$x
[1] 11 12 13
> df$y
[1] 21 22 23
> df[,1]
[1] 11 12 13
> hw = data.frame(hello=c(1,2,3), world=c(4,5,6))
> hw
hello world
1 1 4
2 2 5
3 3 6
> colSums(df)
x y z
36 66 37
> rowSums(df)
[1] 39 54 46
> l <- list(x=c(1,2,3), y=c(3,2,1))
> df <- as.data.frame((l))
> df
x y
1 1 3
2 2 2
3 3 1
> df <- data.frame(x=1:40, y=(1:40)^2, z=(1:40)^3)
> head(df)
x y z
1 1 1 1
2 2 4 8
3 3 9 27
4 4 16 64
5 5 25 125
6 6 36 216
> head(df, n=4)
x y z
1 1 1 1
2 2 4 8
3 3 9 27
4 4 16 64
> tail(df)
x y z
35 35 1225 42875
36 36 1296 46656
37 37 1369 50653
38 38 1444 54872
39 39 1521 59319
40 40 1600 64000
> tail(df, n=3)
x y z
38 38 1444 54872
39 39 1521 59319
40 40 1600 64000
> dplyr::sample_n(df, 4)
x y z
13 13 169 2197
12 12 144 1728
30 30 900 27000
27 27 729 19683
> df <- data.frame(x=1:4, y=(1:4)^2, z=(1:4)^3)
> dplyr::sample_n(df, 6, replace=T)
x y z
2 2 4 8
2.1 2 4 8
4 4 16 64
2.2 2 4 8
2.3 2 4 8
3 3 9 27
> df <- data.frame(x=c(1,2,3), y=c(3,2,1))
> x
Error: object 'x' not found
> attach(df)
> x
[1] 1 2 3
> y
[1] 3 2 1
> detach(df)
> x
Error: object 'x' not found
> attach(df)
> df$x <- x +  y
> df
x y
1 4 3
2 4 2
3 4 1
> x
[1] 1 2 3
> y
[1] 3 2 1
> detach()
> attach(df)
> x
[1] 4 4 4
> y
[1] 3 2 1
> df <- data.frame(x=c(1,2,3), y=c('a', 'b', 'c'))
> df
x y
1 1 a
2 2 b
3 3 c
> rbind(df, c(4, 'c'))
x y
1 1 a
2 2 b
3 3 c
4 4 c
> cbind(df, z=c(T, F, F))
x y z
1 1 a TRUE
2 2 b FALSE
3 3 c FALSE
> rbind(df, c(4, 'd'))
x y
1 1 a
2 2 b
3 3 c
4 4 <NA>
Warning message:
In `[<-.factor`(`*tmp*`, ri, value = "d") :
invalid factor level, NA generated
> View(df)
> df <- data.frame(x=c(1,2,3), y=c('a', 'b', 'c'))
> str(df)
'data.frame': 3 obs. of 2 variables:
$ x: num 1 2 3
$ y: Factor w/ 3 levels "a","b","c": 1 2 3
> df <- data.frame(x=1:40, y=(1:40)^2, z=(1:40)^3)
> str(df)
'data.frame': 40 obs. of 3 variables:
$ x: int 1 2 3 4 5 6 7 8 9 10 ...
$ y: num 1 4 9 16 25 36 49 64 81 100 ...
$ z: num 1 8 27 64 125 216 343 512 729 1000 ...
> df <- data.frame(x=1:10, y=11:20, z=21:30, a=31:40, b=41:50)
> df$x[5] <- NA
> df$a[7] <- NA
> df
x y z a b
1 1 11 21 31 41
2 2 12 22 32 42
3 3 13 23 33 43
4 4 14 24 34 44
5 NA 15 25 35 45
6 6 16 26 36 46
7 7 17 27 NA 47
8 8 18 28 38 48
9 9 19 29 39 49
10 10 20 30 40 50
> colSums(is.na(df))
x y z a b
1 0 0 1 0
> colSums(is.na(df)) > 0
x y z a b
TRUE FALSE FALSE TRUE FALSE
> colnames(df)[colSums(is.na(df)) > 0]
[1] "x" "a"

Time Series

> observations <- sample(1:10, 24, replace=T)
> observations
[1] 2 7 2 6 2 5 5 8 8 6 4 9 8 6 3 2 5 1 2 5 4 8 5 10
> time_series <- ts(observations, start=c(2016,1), frequency=12)
> time_series
Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
2016 2 7 2 6 2 5 5 8 8 6 4 9
2017 8 6 3 2 5 1 2 5 4 8 5 10
> class(time_series)
[1] "ts"
> mode(time_series)
[1] "numeric"
> typeof(time_series)
[1] "integer"
> window(time_series, start=c(2016, 7), end=c(2016, 12))
Jul Aug Sep Oct Nov Dec
2016 5 8 8 6 4 9

--

--

--

Python | JavaScript | Web Applications | Math | Statistics | Computer Vision | Sparse Representations https://www.linkedin.com/in/shaileshkumar1729/

Love podcasts or audiobooks? Learn on the go with our new app.

Recommended from Medium

Parallel K-Means Clustering with reducer function

Vert.x 4: How to build a reactive RESTful Web Service

How I got direct Internship at Microsoft | Microsoft Engage

How we cloned a news website, The New York Times

Getting Started With Serverless Framework

Why I want to become a software engineer…

Head First Java Chapter 02 summary

#100DaysofSQL | DAY 12:

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store
Shailesh Kumar

Shailesh Kumar

Python | JavaScript | Web Applications | Math | Statistics | Computer Vision | Sparse Representations https://www.linkedin.com/in/shaileshkumar1729/

More from Medium

Data Analysis with R — Part 3 (Control Flow)

How to Filter Data in R using dplyr

Simple Linear Regression in R

Practical advantages of R language in Web Analytics and Data Science