let’s grab a random data set from R on motorcars and use it to form a matrix:
A = rbind(mtcars$wt,mtcars$disp,mtcars$mpg)
# centre the rows
for (i in 1:3) A[i,] = A[i,] - mean(A[i,])
# look at the dota
A
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] -0.597250 -0.342250 -0.897250 -0.002250 0.222750 0.242750
## [2,] -70.721875 -70.721875 -122.721875 27.278125 129.278125 -5.721875
## [3,] 0.909375 0.909375 2.709375 1.309375 -1.390625 -1.990625
## [,7] [,8] [,9] [,10] [,11] [,12]
## [1,] 0.352750 -0.027250 -0.067250 0.222750 0.222750 0.852750
## [2,] 129.278125 -84.021875 -89.921875 -63.121875 -63.121875 45.078125
## [3,] -5.790625 4.309375 2.709375 -0.890625 -2.290625 -3.690625
## [,13] [,14] [,15] [,16] [,17] [,18]
## [1,] 0.512750 0.562750 2.032750 2.206750 2.127750 -1.01725
## [2,] 45.078125 45.078125 241.278125 229.278125 209.278125 -152.02188
## [3,] -2.790625 -4.890625 -9.690625 -9.690625 -5.390625 12.30937
## [,19] [,20] [,21] [,22] [,23] [,24]
## [1,] -1.60225 -1.38225 -0.752250 0.302750 0.217750 0.622750
## [2,] -155.02188 -159.62188 -110.621875 87.278125 73.278125 119.278125
## [3,] 10.30937 13.80937 1.409375 -4.590625 -4.890625 -6.790625
## [,25] [,26] [,27] [,28] [,29] [,30]
## [1,] 0.627750 -1.282250 -1.077250 -1.70425 -0.047250 -0.447250
## [2,] 169.278125 -151.721875 -110.421875 -135.62188 120.278125 -85.721875
## [3,] -0.890625 7.209375 5.909375 10.30937 -4.290625 -0.390625
## [,31] [,32]
## [1,] 0.352750 -0.437250
## [2,] 70.278125 -109.721875
## [3,] -5.090625 1.309375
# better to plot it
scatterplot3d(t(A), pch = 19)
It’d be better just to look at it in 2D, but which dimensions do we choose? 1 and 2 or 2 and 3 or 1 and 3?
plot(A[1,], A[2,], pch = 19)
plot(A[2,], A[3,], pch = 19)
plot(A[1,], A[3,], pch = 19)
Instead, use axes defined by the first 2 PCs
# compute PCA
pc = prcomp(t(A))
pc$rotation
## PC1 PC2 PC3
## [1,] 0.007006108 -0.06695905 -0.997731126
## [2,] 0.999126075 0.04158407 0.004225142
## [3,] -0.041206807 0.99688879 -0.067191878
# get coordinate of first observation in direction of PC1 by taking dot product
t(A[,1]) %*% pc$rotation[,1]
## [,1]
## [1,] -70.70173
# dont need to do this component by compenet, can use matrix multiplication
proj = t(t(A) %*% pc$rotation)
proj
## [,1] [,2] [,3] [,4] [,5]
## PC1 -70.7017262 -70.69993966 -122.732556 27.20031503 129.2240094
## PC2 -1.9943662 -2.01144078 -2.342250 2.43978731 3.9746967
## PC3 0.2359824 -0.01843907 0.194649 0.02951947 0.4174125
## [,6] [,7] [,8] [,9] [,10] [,11]
## PC1 -5.6331465 129.4062302 -84.1262127 -89.9554059 -63.0284508 -62.9707613
## PC2 -2.2386249 -0.4203186 0.8038209 -1.0338688 -3.5276335 -4.9232778
## PC3 -0.1326211 0.5833517 -0.6173711 -0.4948832 -0.4291007 -0.3350321
## [,12] [,13] [,14] [,15] [,16] [,17]
## PC1 45.1967834 45.1573152 45.24419982 241.4808274 229.4925335 209.3322693
## PC2 -1.8617102 -0.9417442 -3.03855861 0.2367395 -0.2739201 3.1863100
## PC3 -0.4123737 -0.1336178 -0.04240145 -0.3575724 -0.5818793 -0.8764865
## [,18] [,19] [,20] [,21] [,22]
## PC1 -152.403376 -155.3224395 -160.061102 -110.5885460 87.3931366
## PC2 6.017504 3.9381453 7.221238 -3.1447475 -0.9672350
## PC3 -0.454462 0.2509191 -0.223189 0.1884516 0.3751521
## [,23] [,24] [,25] [,26] [,27]
## PC1 73.4171380 119.4580679 169.1712865 -151.8953403 -110.5764284
## PC2 -1.8427870 -1.8511270 6.1093854 0.9635906 1.3713306
## PC3 0.4209648 0.3389048 0.1487411 0.1538829 0.2111958
## [,28] [,29] [,30] [,31] [,32]
## PC1 -135.9401082 120.3494828 -85.633998 70.428947 -109.6830049
## PC2 4.7517060 0.7275416 -3.924127 -2.175956 -3.2281028
## PC3 0.4346554 0.8436301 0.110295 0.287034 -0.1153119
# plot points in space defined by first 2 PCs
plot(proj[1,], proj[2,])