let’s grab a random data set from R on motorcars and use it to form a matrix:

A = rbind(mtcars$wt,mtcars$disp,mtcars$mpg)

# centre the rows
for (i in 1:3) A[i,] = A[i,] - mean(A[i,])

# look at the dota
A
##            [,1]       [,2]        [,3]      [,4]       [,5]      [,6]
## [1,]  -0.597250  -0.342250   -0.897250 -0.002250   0.222750  0.242750
## [2,] -70.721875 -70.721875 -122.721875 27.278125 129.278125 -5.721875
## [3,]   0.909375   0.909375    2.709375  1.309375  -1.390625 -1.990625
##            [,7]       [,8]       [,9]      [,10]      [,11]     [,12]
## [1,]   0.352750  -0.027250  -0.067250   0.222750   0.222750  0.852750
## [2,] 129.278125 -84.021875 -89.921875 -63.121875 -63.121875 45.078125
## [3,]  -5.790625   4.309375   2.709375  -0.890625  -2.290625 -3.690625
##          [,13]     [,14]      [,15]      [,16]      [,17]      [,18]
## [1,]  0.512750  0.562750   2.032750   2.206750   2.127750   -1.01725
## [2,] 45.078125 45.078125 241.278125 229.278125 209.278125 -152.02188
## [3,] -2.790625 -4.890625  -9.690625  -9.690625  -5.390625   12.30937
##           [,19]      [,20]       [,21]     [,22]     [,23]      [,24]
## [1,]   -1.60225   -1.38225   -0.752250  0.302750  0.217750   0.622750
## [2,] -155.02188 -159.62188 -110.621875 87.278125 73.278125 119.278125
## [3,]   10.30937   13.80937    1.409375 -4.590625 -4.890625  -6.790625
##           [,25]       [,26]       [,27]      [,28]      [,29]      [,30]
## [1,]   0.627750   -1.282250   -1.077250   -1.70425  -0.047250  -0.447250
## [2,] 169.278125 -151.721875 -110.421875 -135.62188 120.278125 -85.721875
## [3,]  -0.890625    7.209375    5.909375   10.30937  -4.290625  -0.390625
##          [,31]       [,32]
## [1,]  0.352750   -0.437250
## [2,] 70.278125 -109.721875
## [3,] -5.090625    1.309375
# better to plot it
scatterplot3d(t(A), pch = 19)

It’d be better just to look at it in 2D, but which dimensions do we choose? 1 and 2 or 2 and 3 or 1 and 3?

plot(A[1,], A[2,], pch = 19)

plot(A[2,], A[3,], pch = 19)

plot(A[1,], A[3,], pch = 19)

Instead, use axes defined by the first 2 PCs

# compute PCA 
pc = prcomp(t(A))
pc$rotation
##               PC1         PC2          PC3
## [1,]  0.007006108 -0.06695905 -0.997731126
## [2,]  0.999126075  0.04158407  0.004225142
## [3,] -0.041206807  0.99688879 -0.067191878
# get coordinate of first observation in direction of PC1 by taking dot product
t(A[,1]) %*% pc$rotation[,1]
##           [,1]
## [1,] -70.70173
# dont need to do this component by compenet, can use matrix multiplication
proj = t(t(A) %*% pc$rotation)

proj 
##            [,1]         [,2]        [,3]        [,4]        [,5]
## PC1 -70.7017262 -70.69993966 -122.732556 27.20031503 129.2240094
## PC2  -1.9943662  -2.01144078   -2.342250  2.43978731   3.9746967
## PC3   0.2359824  -0.01843907    0.194649  0.02951947   0.4174125
##           [,6]        [,7]        [,8]        [,9]       [,10]       [,11]
## PC1 -5.6331465 129.4062302 -84.1262127 -89.9554059 -63.0284508 -62.9707613
## PC2 -2.2386249  -0.4203186   0.8038209  -1.0338688  -3.5276335  -4.9232778
## PC3 -0.1326211   0.5833517  -0.6173711  -0.4948832  -0.4291007  -0.3350321
##          [,12]      [,13]       [,14]       [,15]       [,16]       [,17]
## PC1 45.1967834 45.1573152 45.24419982 241.4808274 229.4925335 209.3322693
## PC2 -1.8617102 -0.9417442 -3.03855861   0.2367395  -0.2739201   3.1863100
## PC3 -0.4123737 -0.1336178 -0.04240145  -0.3575724  -0.5818793  -0.8764865
##           [,18]        [,19]       [,20]        [,21]      [,22]
## PC1 -152.403376 -155.3224395 -160.061102 -110.5885460 87.3931366
## PC2    6.017504    3.9381453    7.221238   -3.1447475 -0.9672350
## PC3   -0.454462    0.2509191   -0.223189    0.1884516  0.3751521
##          [,23]       [,24]       [,25]        [,26]        [,27]
## PC1 73.4171380 119.4580679 169.1712865 -151.8953403 -110.5764284
## PC2 -1.8427870  -1.8511270   6.1093854    0.9635906    1.3713306
## PC3  0.4209648   0.3389048   0.1487411    0.1538829    0.2111958
##            [,28]       [,29]      [,30]     [,31]        [,32]
## PC1 -135.9401082 120.3494828 -85.633998 70.428947 -109.6830049
## PC2    4.7517060   0.7275416  -3.924127 -2.175956   -3.2281028
## PC3    0.4346554   0.8436301   0.110295  0.287034   -0.1153119
# plot points in space defined by first 2 PCs
plot(proj[1,], proj[2,])