R Matrix
Introduction
Matrix is just a table of fancy numbers and notations, here let’s see how we manipulate it with R and Python.
Matrix
In R, A matrix is the special case of a two-dimensional array.
Note:
- ncol == number of column
- nrow == number of row
- byrow == by row sequences
- dimnames == names of rownames & colnames
m <- matrix(1:6, nrow = 3, ncol = 2) #== m <- matrix(1:6, 3) dimnames(m) <- list(c("a", "b","c"), c("d", "e"));m # d e # a 1 4 # b 2 5 # c 3 6 dim(m) <- c(2,3);m # 1 3 5 # 2 4 6 dim(m) <- 6;m # 1 2 3 4 5 6 |
X = matrix(1:20, nrow=5, byrow=T, dimnames=list(c("A","B","C","D","E"),c("boy","girl","man","woman"))) # boy girl man woman # A 1 2 3 4 # B 5 6 7 8 # C 9 10 11 12 # D 13 14 15 16 # E 17 18 19 20 X[,2] X[,"girl"] # A B C D E # 2 6 10 14 18 X["B",] X[2,] # boy girl man woman # 5 6 7 8 X[[6]] X[1,2] # 2 X[2,4] X[[(4-1)*nrow(X) + 2]] # 12 X[1, 2] # girl # A 2 X[1, 2,drop = T] # 2 attributes(X) # $dim # [1] 5 4 # # $dimnames # $dimnames[[1]] # [1] "A" "B" "C" "D" "E" # # $dimnames[[2]] # [1] "boy" "girl" "man" "woman" dim(X) attributes(X)$dim # 5 4 nrow(X);ncol(X) # 5 # 4 dimnames(X)[[1]];dimnames(X)[[2]] attributes(X)$dimnames[[1]];attributes(X)$dimnames[[2]] attr(X,"dimnames")[[1]];attr(X,"dimnames")[[2]] # "A" "B" "C" "D" "E" # "boy" "girl" "man" "woman" rownames(X) # "A" "B" "C" "D" "E" colnames(X) # "boy" "girl" "man" "woman" names(X) # !!! NULL length(X) # 20 nchar(X) # boy girl man woman # A 1 1 1 1 # B 1 1 1 1 # C 1 2 2 2 # D 2 2 2 2 # E 2 2 2 2 rowSums(X) rowSums = apply(X, 1, sum) # A B C D E # 10 26 42 58 74 rowMeans(X) rowMeans = apply(X, 1, mean) # A B C D E # 2.5 6.5 10.5 14.5 18.5 colSums(X) colSums = apply(X, 2, sum) # boy girl man woman # 45 50 55 60 colMeans(X) colMeans = apply(X, 2, mean) # boy girl man woman # 9 10 11 12 colSds = apply(X, 2, sd) # boy girl man woman # 6.324555 6.324555 6.324555 6.324555 str(X) # int [1:5, 1:4] 1 5 9 13 17 2 6 10 14 18 ... # - attr(*, "dimnames")=List of 2 # ..$ : chr [1:5] "A" "B" "C" "D" ... # ..$ : chr [1:4] "boy" "girl" "man" "woman" |
Array
matrix just is a special array
identical(array(1:9,dim=(c(3,3))), matrix(1:9,3)) # TRUE dat = data.frame(boy = X[,1], girl=X[,2], man=X[,3], woman=X[,4]) rownames(dat) <- letters[1:nrow(dat)] data.matrix(dat) # boy girl man woman # a 1 2 3 4 # b 5 6 7 8 # c 9 10 11 12 # d 13 14 15 16 # e 17 18 19 20 array(X, c(2,5,2)) # , , 1 # # [,1] [,2] [,3] [,4] [,5] # [1,] 1 9 17 6 14 # [2,] 5 13 2 10 18 # # , , 2 # # [,1] [,2] [,3] [,4] [,5] # [1,] 3 11 19 8 16 # [2,] 7 15 4 12 20 as.vector(X) # [1] 1 5 9 13 17 2 6 10 14 18 3 7 11 15 19 4 8 12 16 20 X[] <- 0 identical(X, array(0,c(1,5,4))) # TRUE Y <- 1:40 dim(Y) <- c(2,5,4) Y[1, 2, 1] # 3 Y[1, 1:2, 2:1] # [,1] [,2] # [1,] 11 1 # [2,] 13 3 Y[1, , ] # [,1] [,2] [,3] [,4] # [1,] 1 11 21 31 # [2,] 3 13 23 33 # [3,] 5 15 25 35 # [4,] 7 17 27 37 # [5,] 9 19 29 39 Y[ , 2, ] # [,1] [,2] [,3] [,4] # [1,] 3 13 23 33 # [2,] 4 14 24 34 Y[ , , 2] # [,1] [,2] [,3] [,4] [,5] # [1,] 11 13 15 17 19 # [2,] 12 14 16 18 20 Y[matrix(c(1,1,1,1,2,1,1,3,1,1,4,1,2,5,4), ncol=3, byrow=T)] # 1 3 5 7 40 |
rbind & cbind
x <- 1:3 y <- 10:12 cbind(x,y) # x y # [1,] 1 10 # [2,] 2 11 # [3,] 3 12 rbind(x,y) # [,1] [,2] [,3] # x 1 2 3 # y 10 11 12 rbind(x,z=0) # [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] # x 1 3 2 7 4 8 5 8 10 4 # z 0 0 0 0 0 0 0 0 0 0 list(row = rbind(rowSums(X),rowMeans(X)),col =rbind(colSums(X),colMeans(X))) # $row # A B C D E # rowSums 10.0 26.0 42.0 58.0 74.0 # rowMeans 2.5 6.5 10.5 14.5 18.5 # # $col # boy girl man woman # colSums 45 50 55 60 # colMeans 9 10 11 12 |
Diagnal & Transposed matrix
m <- matrix(1:9, nrow = 3, ncol = 3) # 1 4 7 # 2 5 8 # 3 6 9 diag(m) # 1 5 9 diag(m)=c(0,0,1) # 0 4 7 # 2 0 8 # 3 6 1 diag(c(1,2,3)) # 1 0 0 # 0 2 0 # 0 0 3 diag(4) # 1 0 0 0 # 0 1 0 0 # 0 0 1 0 # 0 0 0 1 t(m) # 0 2 3 # 4 0 6 # 7 8 1 m[lower.tri(m)] # 2 3 6 m[upper.tri(m)]=0 # 0 0 0 # 2 0 0 # 3 6 1 |
Operation
x <- 1:6 y <- seq(2, by=2, length = 6) x/y # [1] 0.5 0.5 0.5 0.5 0.5 0.5 matrix(x) # [,1] # [1,] 1 # [2,] 2 # [3,] 3 # [4,] 4 # [5,] 5 # [6,] 6 x + matrix(1:12, nrow = 3) # [,1] [,2] [,3] [,4] # [1,] 2 8 8 14 # [2,] 4 10 10 16 # [3,] 6 12 12 18 x*y # [1] 2 8 18 32 50 72 (z <- x %*% y) # inner product # [,1] # [1,] 182 drop(z) # 182 x %*% t(y) x %o% y #== outer(x,y) # [,1] [,2] [,3] [,4] [,5] [,6] # [1,] 2 4 6 8 10 12 # [2,] 4 8 12 16 20 24 # [3,] 6 12 18 24 30 36 # [4,] 8 16 24 32 40 48 # [5,] 10 20 30 40 50 60 # [6,] 12 24 36 48 60 72 x = matrix(1:4,nrow=2) y x %*% x # [,1] [,2] # [1,] 7 15 # [2,] 10 22 det(x) # -2 crossprod(x, x) #== t(x)%*%x # [,1] [,2] # [1,] 5 11 # [2,] 11 25 tcrossprod(x) #== x%*%t(x) # [,1] [,2] # [1,] 10 14 # [2,] 14 20 pow.matrix <- function(mat, n = 1) { p = mat while (n > 1) { p <- mat %*% p n = n - 1 } p } pow.matrix(x,5) # 1069 2337 # 1558 3406 y = matrix(1:8, nrow=2) kronecker(x,y) # [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] # [1,] 1 3 5 7 3 9 15 21 # [2,] 2 4 6 8 6 12 18 24 # [3,] 2 6 10 14 4 12 20 28 # [4,] 4 8 12 16 8 16 24 32 |
Inverse
set.seed(0) A = matrix(round(rnorm(9),3),nrow=3,ncol=3) # [,1] [,2] [,3] # [1,] 1.263 1.272 -0.929 # [2,] -0.326 0.415 -0.295 # [3,] 1.330 -1.540 -0.006 solve(A) # [,1] [,2] [,3] # [1,] 0.44257596 -1.393536 -0.009974648 # [2,] 0.38203629 -1.189780 -0.654421208 # [3,] 0.04835694 -3.523613 -0.909603614 round(A%*%solve(A)) # [,1] [,2] [,3] # [1,] 1 0 0 # [2,] 0 1 0 # [3,] 0 0 1 |
## NOTE: solve is not a stable solution seqs <- seq(1,1000,length=50) mat <- matrix(c(seqs,seqs^2,seqs^3,rep(1,50)),ncol = 4) colnames(mat) <- c("X","X2","X3","Intercept") det(crossprod(mat)) # 1.291e+36 solve(crossprod(mat)) ## Error in solve.default(crossprod(mat)) : system is computationally singular: reciprocal condition number = 4.70082e-19 ## we can use the QR factorization to solve this problem |
Eigen
library(LoopAnalyst) A = matrix(1:9,nrow=3) make.adjoint(A) # [,1] [,2] [,3] # [1,] -3 6 -3 # [2,] 6 -12 6 # [3,] -3 6 -3 eigen(A) # eigen() decomposition # $values # [1] 1.4186175+0.0000000i -0.5568088+0.3363604i -0.5568088-0.3363604i # # $vectors # [,1] [,2] [,3] # [1,] 0.4934459+0i 0.2008221+0.1603767i 0.2008221-0.1603767i # [2,] 0.6188857+0i 0.3058970-0.2288351i 0.3058970+0.2288351i # [3,] -0.6111396+0i 0.8877000+0.0000000i 0.8877000+0.0000000i # a %*% x = b(I) |
Useful Functions
outer(month.abb, 2017:2018, FUN = "paste") # [,1] [,2] # [1,] "Jan 2017" "Jan 2018" # [2,] "Feb 2017" "Feb 2018" # [3,] "Mar 2017" "Mar 2018" # [4,] "Apr 2017" "Apr 2018" # [5,] "May 2017" "May 2018" # [6,] "Jun 2017" "Jun 2018" # [7,] "Jul 2017" "Jul 2018" # [8,] "Aug 2017" "Aug 2018" # [9,] "Sep 2017" "Sep 2018" # [10,] "Oct 2017" "Oct 2018" # [11,] "Nov 2017" "Nov 2018" # [12,] "Dec 2017" "Dec 2018" outer(1:5,5:1,"+") # [,1] [,2] [,3] [,4] [,5] # [1,] 6 5 4 3 2 # [2,] 7 6 5 4 3 # [3,] 8 7 6 5 4 # [4,] 9 8 7 6 5 # [5,] 10 9 8 7 6 hil <- function(n){ i <- 1:n outer(i-1,i,"+") } hil(5) # [,1] [,2] [,3] [,4] [,5] # [1,] 1 2 3 4 5 # [2,] 2 3 4 5 6 # [3,] 3 4 5 6 7 # [4,] 4 5 6 7 8 # [5,] 5 6 7 8 9 |
Advanced Matrix
Moore-Penrose
# AA+A=A A%*%ginv(A)%*%A # A+AA+=A+ ginv(A)%*%A%*%ginv(A) # (AA+)T=AA+ A%*%ginv(A) t(A%*%ginv(A)) symmetric # (A+A)T=A+A ginv(A)%*%A t(ginv(A)%*%A) symmetric |
library(MASS) A=matrix(1:12,nrow=3,ncol=4) ginv(A) # [,1] [,2] [,3] # [1,] -0.483333333 -0.03333333 0.41666667 # [2,] -0.244444444 -0.01111111 0.22222222 # [3,] -0.005555556 0.01111111 0.02777778 # [4,] 0.233333333 0.03333333 -0.16666667 |
QR Factorization
The QR factorization tells us that we can decompose any full rank \(N\times p\) matrix \(\mathbf{X}\) as:
\[\mathbf{X = QR}\]
with:
- \(\mathbf{Q}\) a \(N \times p\) matrix with \(\mathbf{Q^\top Q=I}\)
- \(\mathbf{R}\) a \(p \times p\) upper triangular matrix.
If we want to calculate the LSE, you can use the QR decomposition, its the more stable method.
WHY:
Supposed we need to solve the minimum RSS(Residual Sum of Squares):
\[ (\mathbf{Y}-\mathbf{X}\boldsymbol{\beta})^\top (\mathbf{Y}-\mathbf{X}\boldsymbol{\beta}) \] The derivative of the above equation is: \[2\mathbf{X}^\top (\mathbf{Y} - \mathbf{X} \boldsymbol{\hat{\beta}})=0\] \[\mathbf{X}^\top \mathbf{X} \boldsymbol{\hat{\beta}} = \mathbf{X}^\top \mathbf{Y} \]
The solution is:
\[ \boldsymbol{\hat{\beta}} = (\mathbf{X}^\top \mathbf{X})^{-1} \mathbf{X}^\top \mathbf{Y} \]
If we rewrite the equations of the LSE using \(\mathbf{QR}\) instead of \(\mathbf{X}\) we have:
\[(\mathbf{Q}\mathbf{R})^\top (\mathbf{Q}\mathbf{R}) \boldsymbol{\beta} = (\mathbf{Q}\mathbf{R})^\top \mathbf{Y}\] then:
\[\mathbf{R} \boldsymbol{\beta} = \mathbf{Q}^\top \mathbf{Y}\]
We use backsolve which takes advantage of the upper triangular nature of \(\mathbf{R}\).
- backsolve(r, x, k=ncol(r), upper.tri=TRUE, transpose=FALSE)
- forwardsolve(l, x, k=ncol(l), upper.tri=FALSE, transpose=FALSE)
For instance, we want to get the answer of this equation:
\[\begin{equation} \begin{cases} X_1+4X_2+7X_3=1\\ 2X_1+5X_2+8X_3=2\\ 3X_1+6X_2+9X_3=3\\ \end{cases} \end{equation}\]that means:
\[ \, \begin{pmatrix} 1&4&7\\ 2&5&8\\ 3&6&9\\ \end{pmatrix} \begin{pmatrix} X_1\\ X_2\\ X_3\\ \end{pmatrix} = \begin{pmatrix} 1\\ 2\\ 3\\ \end{pmatrix} \]
backsolve(matrix(1:9,nrow = 3),c(1,2,3)) # -0.8000000 -0.1333333 0.3333333 |
A <- matrix(1:9,3,3) QR <- qr(A) # $qr # [,1] [,2] [,3] # [1,] -3.7417 -8.5524 -1.336e+01 # [2,] 0.5345 1.9640 3.928e+00 # [3,] 0.8018 0.9887 1.776e-15 # # $rank # [1] 2 # # $qraux # [1] 1.267e+00 1.150e+00 1.776e-15 # # $pivot # [1] 1 2 3 # # attr(,"class") # [1] "qr" qr.Q(QR);qr.R(QR) # [,1] [,2] [,3] # [1,] -0.2673 0.8729 0.4082 # [2,] -0.5345 0.2182 -0.8165 # [3,] -0.8018 -0.4364 0.4082 # [,1] [,2] [,3] # [1,] -3.742 -8.552 -1.336e+01 # [2,] 0.000 1.964 3.928e+00 # [3,] 0.000 0.000 1.776e-15 |
set.seed(1) maty <- mat %*% matrix(c(1,2,3,4),4,1) + rnorm(10,sd=1) QR <- qr(mat) QR$rank # 4 backsolve(qr.R(QR), crossprod(qr.Q(QR),maty)) # [,1] # [1,] 1.002 # [2,] 2.000 # [3,] 3.000 # [4,] 3.951 ## The built-in `solve.qr` function solve.qr(QR, maty) # [,1] # X 1.002 # X2 2.000 # X3 3.000 # Intercept 3.951 |
Then, we can find the fitted values:
\[\mathbf{X}\boldsymbol{\hat{\beta}} = (\mathbf{QR})\mathbf{R}^{-1}\mathbf{Q}^\top \mathbf{y}= \mathbf{Q}\mathbf{Q}^\top\mathbf{y} \]
plot(1:50, maty) fitted <- tcrossprod(qr.Q(QR)) %*% maty lines(1:50,fitted,col="deeppink") |
To obtain the standard errors of the LSE, we note that:
\[(\mathbf{X^\top X})^{-1} = (\mathbf{R^\top Q^\top QR})^{-1} = (\mathbf{R^\top R})^{-1}\]
The function chol2inv is specifically designed to find this inverse.
df <- length(maty) - QR$rank sigma2 <- sum((maty-fitted)^2)/df varbeta <- sigma2*chol2inv(qr.R(QR)) SE <- sqrt(diag(varbeta)) ## This gives us identical results to the `lm` function cbind(betahat,SE,summary(lm(maty ~ 0 + mat))$coef[,1:2]) |
Choleskey
A = UU’
A = diag(3)+1 t(chol(A)) %*% chol(A) # [,1] [,2] [,3] # [1,] 2 1 1 # [2,] 1 2 1 # [3,] 1 1 2 prod(diag(chol(A))^2) # 4 chol2inv(chol(A)) # [,1] [,2] [,3] # [1,] 0.75 -0.25 -0.25 # [2,] -0.25 0.75 -0.25 # [3,] -0.25 -0.25 0.75 |
SVD Factorization
A=UDV’ U’U=V’V=I
A=matrix(1:18,3,6) A = svd(A) A$u%*%diag(A$d)%*%t(A$v) # [,1] [,2] [,3] [,4] [,5] [,6] # [1,] 1 4 7 10 13 16 # [2,] 2 5 8 11 14 17 # [3,] 3 6 9 12 15 18 |
Time Series
library(fMultivar) tslag(1:9,1:4,trim=F) tslag(1:9,1:4,trim=T) as.vector(A[1:min(dim(A)),1:min(dim(A))]) # [,1] [,2] [,3] [,4] # [1,] NA NA NA NA # [2,] 1 NA NA NA # [3,] 2 1 NA NA # [4,] 3 2 1 NA # [5,] 4 3 2 1 # [6,] 5 4 3 2 # [7,] 6 5 4 3 # [8,] 7 6 5 4 # [9,] 8 7 6 5 # [,1] [,2] [,3] [,4] # [1,] 4 3 2 1 # [2,] 5 4 3 2 # [3,] 6 5 4 3 # [4,] 7 6 5 4 # [5,] 8 7 6 5 # [1] 1 2 3 5 6 7 9 10 11 |





