Introduction

Matrix is just a table of fancy numbers and notations, here let’s see how we manipulate it with R and Python.

Matrix

In R, A matrix is the special case of a two-dimensional array.

Note:

ncol == number of column
nrow == number of row
byrow == by row sequences
dimnames == names of rownames & colnames

m <- matrix(1:6, nrow = 3, ncol = 2)
#== m <- matrix(1:6, 3)
dimnames(m) <- list(c("a", "b","c"), c("d", "e"));m
#   d e
# a 1 4
# b 2 5
# c 3 6
dim(m) <- c(2,3);m
# 1    3    5
# 2    4    6
dim(m) <- 6;m
# 1 2 3 4 5 6

X = matrix(1:20, nrow=5, byrow=T,
           dimnames=list(c("A","B","C","D","E"),c("boy","girl","man","woman")))
#   boy girl man woman
# A   1    2     3   4
# B   5    6     7   8
# C   9   10    11  12
# D  13   14    15  16
# E  17   18    19  20

X[,2]
X[,"girl"]
 # A  B  C  D  E 
 # 2  6 10 14 18 
X["B",]
X[2,]
# boy  girl   man woman 
#     5     6     7     8

X[[6]]
X[1,2]
# 2
X[2,4]
X[[(4-1)*nrow(X) + 2]]
# 12

X[1, 2]
#   girl
# A    2
X[1, 2,drop = T]
# 2

attributes(X)
# $dim
# [1] 5 4
# 
# $dimnames
# $dimnames[[1]]
# [1] "A" "B" "C" "D" "E"
# 
# $dimnames[[2]]
# [1] "boy"   "girl"  "man" "woman"  

dim(X)
attributes(X)$dim
# 5 4
nrow(X);ncol(X)
# 5
# 4

dimnames(X)[[1]];dimnames(X)[[2]]
attributes(X)$dimnames[[1]];attributes(X)$dimnames[[2]]
attr(X,"dimnames")[[1]];attr(X,"dimnames")[[2]]
# "A" "B" "C" "D" "E"
# "boy"   "girl"  "man"   "woman" 
rownames(X)
# "A" "B" "C" "D" "E"
colnames(X)
# "boy"   "girl"  "man"   "woman"
names(X)
# !!! NULL

length(X)
# 20
nchar(X)
#   boy girl man woman
# A   1    1   1     1
# B   1    1   1     1
# C   1    2   2     2
# D   2    2   2     2
# E   2    2   2     2

rowSums(X)
rowSums = apply(X, 1, sum)
# A  B  C  D  E 
# 10 26 42 58 74
rowMeans(X)
rowMeans = apply(X, 1, mean)
#   A    B    C    D    E 
# 2.5  6.5 10.5 14.5 18.5 
colSums(X)
colSums = apply(X, 2, sum)
# boy  girl   man woman 
# 45    50    55    60 
colMeans(X)
colMeans = apply(X, 2, mean)
# boy  girl   man woman 
#   9    10    11    12
colSds = apply(X, 2, sd)
#      boy     girl      man    woman 
# 6.324555 6.324555 6.324555 6.324555

str(X)
# int [1:5, 1:4] 1 5 9 13 17 2 6 10 14 18 ...
#  - attr(*, "dimnames")=List of 2
#   ..$ : chr [1:5] "A" "B" "C" "D" ...
#   ..$ : chr [1:4] "boy" "girl" "man" "woman"

Array

matrix just is a special array

identical(array(1:9,dim=(c(3,3))), matrix(1:9,3))
# TRUE

dat = data.frame(boy = X[,1], girl=X[,2], man=X[,3], woman=X[,4])
rownames(dat) <- letters[1:nrow(dat)]
data.matrix(dat)
#   boy girl man woman
# a   1    2   3     4
# b   5    6   7     8
# c   9   10  11    12
# d  13   14  15    16
# e  17   18  19    20
array(X, c(2,5,2))
# , , 1
# 
#      [,1] [,2] [,3] [,4] [,5]
# [1,]    1    9   17    6   14
# [2,]    5   13    2   10   18
# 
# , , 2
# 
#      [,1] [,2] [,3] [,4] [,5]
# [1,]    3   11   19    8   16
# [2,]    7   15    4   12   20
as.vector(X)
# [1]  1  5  9 13 17  2  6 10 14 18  3  7 11 15 19  4  8 12 16 20
X[] <- 0
identical(X, array(0,c(1,5,4)))
# TRUE

Y <- 1:40
dim(Y) <- c(2,5,4)
Y[1, 2, 1]
# 3
Y[1, 1:2, 2:1]
#      [,1] [,2]
# [1,]    11    1
# [2,]    13    3
Y[1, , ]
#      [,1] [,2] [,3] [,4]
# [1,]    1   11   21   31
# [2,]    3   13   23   33
# [3,]    5   15   25   35
# [4,]    7   17   27   37
# [5,]    9   19   29   39
Y[ , 2, ]
#      [,1] [,2] [,3] [,4]
# [1,]    3   13   23   33
# [2,]    4   14   24   34
Y[ , , 2]
#      [,1] [,2] [,3] [,4] [,5]
# [1,]   11   13   15   17   19
# [2,]   12   14   16   18   20
Y[matrix(c(1,1,1,1,2,1,1,3,1,1,4,1,2,5,4), ncol=3, byrow=T)]
# 1  3  5  7 40

rbind & cbind

x <- 1:3
y <- 10:12
cbind(x,y)
#      x  y
# [1,] 1 10
# [2,] 2 11
# [3,] 3 12
rbind(x,y)
#  [,1] [,2] [,3]
# x    1    2    3
# y   10   11   12
rbind(x,z=0)
#   [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
# x    1    3    2    7    4    8    5    8   10     4
# z    0    0    0    0    0    0    0    0    0     0

list(row = rbind(rowSums(X),rowMeans(X)),col =rbind(colSums(X),colMeans(X)))
# $row
#             A    B    C    D    E
# rowSums  10.0 26.0 42.0 58.0 74.0
# rowMeans  2.5  6.5 10.5 14.5 18.5
# 
# $col
#          boy girl man woman
# colSums   45   50  55    60
# colMeans   9   10  11    12

Diagnal & Transposed matrix

m <- matrix(1:9, nrow = 3, ncol = 3)
#  1    4    7
#  2    5    8
#  3    6    9
diag(m)
# 1 5 9
diag(m)=c(0,0,1)
#  0    4    7
#  2    0    8
#  3    6    1
diag(c(1,2,3))
# 1    0    0
# 0    2    0
# 0    0    3
diag(4)
# 1    0    0    0
# 0    1    0    0
# 0    0    1    0
# 0    0    0    1
t(m)
#  0    2    3
#  4    0    6
#  7    8    1
m[lower.tri(m)]
# 2 3 6
m[upper.tri(m)]=0
# 0    0    0
# 2    0    0
# 3    6    1

Operation

x <- 1:6
y <- seq(2, by=2, length = 6)
x/y
# [1] 0.5 0.5 0.5 0.5 0.5 0.5
matrix(x)
#      [,1]
# [1,]    1
# [2,]    2
# [3,]    3
# [4,]    4
# [5,]    5
# [6,]    6
x + matrix(1:12, nrow = 3)
#      [,1] [,2] [,3] [,4]
# [1,]    2    8    8   14
# [2,]    4   10   10   16
# [3,]    6   12   12   18

x*y
# [1] 2  8 18 32 50 72
(z <- x %*% y)    # inner product
#      [,1]
# [1,]   182
drop(z)
# 182

x %*% t(y)  
x %o% y  #== outer(x,y)
#      [,1] [,2] [,3] [,4] [,5] [,6]
# [1,]    2    4    6    8   10   12
# [2,]    4    8   12   16   20   24
# [3,]    6   12   18   24   30   36
# [4,]    8   16   24   32   40   48
# [5,]   10   20   30   40   50   60
# [6,]   12   24   36   48   60   72

x = matrix(1:4,nrow=2)
y
x %*% x
#      [,1] [,2]
# [1,]    7   15
# [2,]   10   22
det(x)
# -2

crossprod(x, x)
#== t(x)%*%x
#      [,1] [,2]
# [1,]    5   11
# [2,]   11   25
tcrossprod(x)
#== x%*%t(x)
#      [,1] [,2]
# [1,]   10   14
# [2,]   14   20

pow.matrix <- function(mat, n = 1) {
  p = mat
  while (n > 1) {
    p <- mat %*% p
    n = n - 1
  }
  p
}
pow.matrix(x,5)
# 1069 2337
# 1558 3406

y = matrix(1:8, nrow=2)
kronecker(x,y)
#      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
# [1,]    1    3    5    7    3    9   15   21
# [2,]    2    4    6    8    6   12   18   24
# [3,]    2    6   10   14    4   12   20   28
# [4,]    4    8   12   16    8   16   24   32

Inverse

set.seed(0)
A = matrix(round(rnorm(9),3),nrow=3,ncol=3)
#        [,1]   [,2]   [,3]
# [1,]  1.263  1.272 -0.929
# [2,] -0.326  0.415 -0.295
# [3,]  1.330 -1.540 -0.006
solve(A)
#            [,1]      [,2]         [,3]
# [1,] 0.44257596 -1.393536 -0.009974648
# [2,] 0.38203629 -1.189780 -0.654421208
# [3,] 0.04835694 -3.523613 -0.909603614
round(A%*%solve(A))
#      [,1] [,2] [,3]
# [1,]    1    0    0
# [2,]    0    1    0
# [3,]    0    0    1

## NOTE: solve is not a stable solution
seqs <- seq(1,1000,length=50)
mat <- matrix(c(seqs,seqs^2,seqs^3,rep(1,50)),ncol = 4)
colnames(mat) <- c("X","X2","X3","Intercept")

det(crossprod(mat))
# 1.291e+36
solve(crossprod(mat))
## Error in solve.default(crossprod(mat)) : system is computationally singular: reciprocal condition number = 4.70082e-19
## we can use the QR factorization to solve this problem

Eigen

library(LoopAnalyst)
A = matrix(1:9,nrow=3)
make.adjoint(A)
#      [,1] [,2] [,3]
# [1,]   -3    6   -3
# [2,]    6  -12    6
# [3,]   -3    6   -3
eigen(A)
# eigen() decomposition
# $values
# [1]  1.4186175+0.0000000i -0.5568088+0.3363604i -0.5568088-0.3363604i
# 
# $vectors
#               [,1]                 [,2]                 [,3]
# [1,]  0.4934459+0i 0.2008221+0.1603767i 0.2008221-0.1603767i
# [2,]  0.6188857+0i 0.3058970-0.2288351i 0.3058970+0.2288351i
# [3,] -0.6111396+0i 0.8877000+0.0000000i 0.8877000+0.0000000i
# a %*% x = b(I)

Useful Functions

outer(month.abb, 2017:2018, FUN = "paste")
#       [,1]       [,2]      
#  [1,] "Jan 2017" "Jan 2018"
#  [2,] "Feb 2017" "Feb 2018"
#  [3,] "Mar 2017" "Mar 2018"
#  [4,] "Apr 2017" "Apr 2018"
#  [5,] "May 2017" "May 2018"
#  [6,] "Jun 2017" "Jun 2018"
#  [7,] "Jul 2017" "Jul 2018"
#  [8,] "Aug 2017" "Aug 2018"
#  [9,] "Sep 2017" "Sep 2018"
# [10,] "Oct 2017" "Oct 2018"
# [11,] "Nov 2017" "Nov 2018"
# [12,] "Dec 2017" "Dec 2018"

outer(1:5,5:1,"+")
#      [,1] [,2] [,3] [,4] [,5]
# [1,]    6    5    4    3    2
# [2,]    7    6    5    4    3
# [3,]    8    7    6    5    4
# [4,]    9    8    7    6    5
# [5,]   10    9    8    7    6

hil <- function(n){
     i <- 1:n
     outer(i-1,i,"+")
 }
hil(5)
#      [,1] [,2] [,3] [,4] [,5]
# [1,]    1    2    3    4    5
# [2,]    2    3    4    5    6
# [3,]    3    4    5    6    7
# [4,]    4    5    6    7    8
# [5,]    5    6    7    8    9

Advanced Matrix

Moore-Penrose

# AA+A=A  A%*%ginv(A)%*%A
# A+AA+=A+ ginv(A)%*%A%*%ginv(A)
# (AA+)T=AA+ A%*%ginv(A)  t(A%*%ginv(A)) symmetric
# (A+A)T=A+A ginv(A)%*%A  t(ginv(A)%*%A) symmetric

library(MASS)
A=matrix(1:12,nrow=3,ncol=4)
ginv(A)
#              [,1]        [,2]        [,3]
# [1,] -0.483333333 -0.03333333  0.41666667
# [2,] -0.244444444 -0.01111111  0.22222222
# [3,] -0.005555556  0.01111111  0.02777778
# [4,]  0.233333333  0.03333333 -0.16666667

QR Factorization

The QR factorization tells us that we can decompose any full rank \(N\times p\) matrix \(\mathbf{X}\) as:

\[\mathbf{X = QR}\]

with:

\(\mathbf{Q}\) a \(N \times p\) matrix with \(\mathbf{Q^\top Q=I}\)
\(\mathbf{R}\) a \(p \times p\) upper triangular matrix.

If we want to calculate the LSE, you can use the QR decomposition, its the more stable method.

WHY:

Supposed we need to solve the minimum RSS(Residual Sum of Squares):

\[ (\mathbf{Y}-\mathbf{X}\boldsymbol{\beta})^\top (\mathbf{Y}-\mathbf{X}\boldsymbol{\beta}) \] The derivative of the above equation is: \[2\mathbf{X}^\top (\mathbf{Y} - \mathbf{X} \boldsymbol{\hat{\beta}})=0\] \[\mathbf{X}^\top \mathbf{X} \boldsymbol{\hat{\beta}} = \mathbf{X}^\top \mathbf{Y} \]

The solution is:

\[ \boldsymbol{\hat{\beta}} = (\mathbf{X}^\top \mathbf{X})^{-1} \mathbf{X}^\top \mathbf{Y} \]

If we rewrite the equations of the LSE using \(\mathbf{QR}\) instead of \(\mathbf{X}\) we have:

\[(\mathbf{Q}\mathbf{R})^\top (\mathbf{Q}\mathbf{R}) \boldsymbol{\beta} = (\mathbf{Q}\mathbf{R})^\top \mathbf{Y}\] then:

\[\mathbf{R} \boldsymbol{\beta} = \mathbf{Q}^\top \mathbf{Y}\]

We use backsolve which takes advantage of the upper triangular nature of \(\mathbf{R}\).

backsolve(r, x, k=ncol(r), upper.tri=TRUE, transpose=FALSE)
forwardsolve(l, x, k=ncol(l), upper.tri=FALSE, transpose=FALSE)

For instance, we want to get the answer of this equation:

\[\begin{equation} \begin{cases} X_1+4X_2+7X_3=1\\ 2X_1+5X_2+8X_3=2\\ 3X_1+6X_2+9X_3=3\\ \end{cases} \end{equation}\]

that means:

\[ \, \begin{pmatrix} 1&4&7\\ 2&5&8\\ 3&6&9\\ \end{pmatrix} \begin{pmatrix} X_1\\ X_2\\ X_3\\ \end{pmatrix} = \begin{pmatrix} 1\\ 2\\ 3\\ \end{pmatrix} \]

backsolve(matrix(1:9,nrow = 3),c(1,2,3))
# -0.8000000 -0.1333333  0.3333333

A <- matrix(1:9,3,3)
QR <- qr(A)
# $qr
#         [,1]    [,2]       [,3]
# [1,] -3.7417 -8.5524 -1.336e+01
# [2,]  0.5345  1.9640  3.928e+00
# [3,]  0.8018  0.9887  1.776e-15
# 
# $rank
# [1] 2
# 
# $qraux
# [1] 1.267e+00 1.150e+00 1.776e-15
# 
# $pivot
# [1] 1 2 3
# 
# attr(,"class")
# [1] "qr"
qr.Q(QR);qr.R(QR)
#         [,1]    [,2]    [,3]
# [1,] -0.2673  0.8729  0.4082
# [2,] -0.5345  0.2182 -0.8165
# [3,] -0.8018 -0.4364  0.4082
#        [,1]   [,2]       [,3]
# [1,] -3.742 -8.552 -1.336e+01
# [2,]  0.000  1.964  3.928e+00
# [3,]  0.000  0.000  1.776e-15

set.seed(1)
maty <- mat %*% matrix(c(1,2,3,4),4,1) + rnorm(10,sd=1)
QR <- qr(mat)
QR$rank
# 4
backsolve(qr.R(QR), crossprod(qr.Q(QR),maty))
#       [,1]
# [1,] 1.002
# [2,] 2.000
# [3,] 3.000
# [4,] 3.951

## The built-in `solve.qr` function
solve.qr(QR, maty)
#            [,1]
# X         1.002
# X2        2.000
# X3        3.000
# Intercept 3.951

Then, we can find the fitted values:

\[\mathbf{X}\boldsymbol{\hat{\beta}} = (\mathbf{QR})\mathbf{R}^{-1}\mathbf{Q}^\top \mathbf{y}= \mathbf{Q}\mathbf{Q}^\top\mathbf{y} \]

plot(1:50, maty)
fitted <- tcrossprod(qr.Q(QR)) %*% maty
lines(1:50,fitted,col="deeppink")

To obtain the standard errors of the LSE, we note that:

\[(\mathbf{X^\top X})^{-1} = (\mathbf{R^\top Q^\top QR})^{-1} = (\mathbf{R^\top R})^{-1}\]

The function chol2inv is specifically designed to find this inverse.

df <- length(maty) - QR$rank
sigma2 <- sum((maty-fitted)^2)/df
varbeta <- sigma2*chol2inv(qr.R(QR))
SE <- sqrt(diag(varbeta))
## This gives us identical results to the `lm` function
cbind(betahat,SE,summary(lm(maty ~ 0 + mat))$coef[,1:2])

Choleskey

A = UU’

A = diag(3)+1

t(chol(A)) %*% chol(A)
#      [,1] [,2] [,3]
# [1,]    2    1    1
# [2,]    1    2    1
# [3,]    1    1    2
prod(diag(chol(A))^2)
# 4
chol2inv(chol(A))
#       [,1]  [,2]  [,3]
# [1,]  0.75 -0.25 -0.25
# [2,] -0.25  0.75 -0.25
# [3,] -0.25 -0.25  0.75

SVD Factorization

A=UDV’ U’U=V’V=I

A=matrix(1:18,3,6)
A = svd(A)
A$u%*%diag(A$d)%*%t(A$v)
#      [,1] [,2] [,3] [,4] [,5] [,6]
# [1,]    1    4    7   10   13   16
# [2,]    2    5    8   11   14   17
# [3,]    3    6    9   12   15   18

Time Series

library(fMultivar)
tslag(1:9,1:4,trim=F)
tslag(1:9,1:4,trim=T)
as.vector(A[1:min(dim(A)),1:min(dim(A))]) 
#     [,1] [,2] [,3] [,4]
# [1,]   NA   NA   NA   NA
# [2,]    1   NA   NA   NA
# [3,]    2    1   NA   NA
# [4,]    3    2    1   NA
# [5,]    4    3    2    1
# [6,]    5    4    3    2
# [7,]    6    5    4    3
# [8,]    7    6    5    4
# [9,]    8    7    6    5
#    [,1] [,2] [,3] [,4]
# [1,]    4    3    2    1
# [2,]    5    4    3    2
# [3,]    6    5    4    3
# [4,]    7    6    5    4
# [5,]    8    7    6    5
# [1]  1  2  3  5  6  7  9 10 11