#A fast matrix-by-vector product demo
import jax
import jax.numpy as jnp
n = 10000
r = 10
u = jax.random.normal(jax.random.PRNGKey(0), (n, r))
v = jax.random.normal(jax.random.PRNGKey(10), (n, r))
a = u @ v.T
x = jax.random.normal(jax.random.PRNGKey(1), (n,))
print(n*n/(2*n*r))
%timeit (a @ x).block_until_ready()
%timeit (u @ (v.T @ x)).block_until_ready()

500.0
8.64 ms ± 97.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
97.4 µs ± 1.16 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


#Computing matrix rank
import jax.numpy as jnp
n = 50 
a = jnp.ones((n, n))
print('Rank of the matrix:', jnp.linalg.matrix_rank(a))
b = a + 1e-8 * jax.random.normal(jax.random.PRNGKey(10), (n, n))
print('Rank of the matrix:', jnp.linalg.matrix_rank(b, tol=1e-8))

Rank of the matrix: 1
Rank of the matrix: 6


#Computing matrix rank
import jax.numpy as jnp
n = 50 
a = jnp.ones((n, n))
print('Rank of the matrix:', jnp.linalg.matrix_rank(a))
b = a + 1e-5 * jax.random.normal(jax.random.PRNGKey(-1), (n, n))
print('Rank of the matrix:', jnp.linalg.matrix_rank(b, tol=1e-3))

Rank of the matrix: 1
Rank of the matrix: 1


u, s, v = jnp.linalg.svd(b) #b = u@jnp.diag(s)@v 
print(s/s[0])
print(s[1]/s[0])
r = 1
u1 = u[:, :r]
s1 = s[:r]
v1 = v[:r, :]
a1 = u1.dot(jnp.diag(s1).dot(v1))
print(jnp.linalg.norm(b - a1, 2)/s[0])

[1.0000000e+00 2.6038917e-06 2.5778331e-06 2.4328988e-06 2.3946168e-06
 2.3161633e-06 2.1771170e-06 2.1350679e-06 2.0692341e-06 1.9072305e-06
 1.8897628e-06 1.7940382e-06 1.7782966e-06 1.7084487e-06 1.6727121e-06
 1.6017437e-06 1.5731581e-06 1.4549468e-06 1.4165987e-06 1.3679330e-06
 1.3424550e-06 1.2941907e-06 1.2687507e-06 1.2097146e-06 1.1957062e-06
 1.1181866e-06 1.1178938e-06 1.0891903e-06 1.0027645e-06 9.4870353e-07
 8.8112211e-07 8.7050188e-07 8.0722310e-07 7.6571308e-07 6.4515058e-07
 6.0674591e-07 5.7988444e-07 4.8432429e-07 4.7683716e-07 4.7153273e-07
 4.2522197e-07 3.7936982e-07 3.2457302e-07 2.7898491e-07 2.7062282e-07
 2.3026750e-07 1.8749994e-07 1.4638464e-07 1.2981367e-07 1.7708503e-08]
2.6038917e-06
2.6034543e-06


%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
plt.rc("text", usetex=True)

n = 100
a = [[1.0/(i+j+1) for i in range(n)] for j in range(n)] #Hilbert matrix 
#a = jnp.ones((n, n)) + 1e-3*jax.random.normal(jax.random.PRNGKey(67575), (n, n))
a = jnp.array(a)
u, s, v = jnp.linalg.svd(a)
plt.semilogy(s[:30]/s[0], 'x')
plt.ylabel(r"$\sigma_i / \sigma_0$", fontsize=24)
plt.xlabel(r"Singular value index, $i$", fontsize=24)
plt.grid(True)
plt.xticks(fontsize=26)
plt.yticks(fontsize=26)
#We have very good low-rank approximation of it!

(array([1.e-09, 1.e-08, 1.e-07, 1.e-06, 1.e-05, 1.e-04, 1.e-03, 1.e-02,
        1.e-01, 1.e+00, 1.e+01, 1.e+02]),
 [Text(0, 1e-09, '$\\mathdefault{10^{-9}}$'),
  Text(0, 1e-08, '$\\mathdefault{10^{-8}}$'),
  Text(0, 1e-07, '$\\mathdefault{10^{-7}}$'),
  Text(0, 1e-06, '$\\mathdefault{10^{-6}}$'),
  Text(0, 1e-05, '$\\mathdefault{10^{-5}}$'),
  Text(0, 0.0001, '$\\mathdefault{10^{-4}}$'),
  Text(0, 0.001, '$\\mathdefault{10^{-3}}$'),
  Text(0, 0.01, '$\\mathdefault{10^{-2}}$'),
  Text(0, 0.1, '$\\mathdefault{10^{-1}}$'),
  Text(0, 1.0, '$\\mathdefault{10^{0}}$'),
  Text(0, 10.0, '$\\mathdefault{10^{1}}$'),
  Text(0, 100.0, '$\\mathdefault{10^{2}}$')])


import jax.numpy as jnp
n = 128
t = jnp.linspace(0, 5, n)
x, y = jnp.meshgrid(t, t)
f = 1.0 / (x + y + 0.01) # test your own function. Check 1.0 / (x - y + 0.5)
u, s, v = jnp.linalg.svd(f, full_matrices=False)
r = 10
u = u[:, :r]
s = s[:r]
v = v[:r, :] # Mind the transpose here!
fappr = (u * s[None, :]) @ v
er = jnp.linalg.norm(fappr - f, 'fro') / jnp.linalg.norm(f, 'fro')
print(er)
plt.semilogy(s/s[0])
plt.ylabel(r"$\sigma_i / \sigma_0$", fontsize=24)
plt.xlabel(r"Singular value index, $i$", fontsize=24)
plt.grid(True)
plt.xticks(fontsize=26)
plt.yticks(fontsize=26)

2.604117e-07

(array([1.e-08, 1.e-07, 1.e-06, 1.e-05, 1.e-04, 1.e-03, 1.e-02, 1.e-01,
        1.e+00, 1.e+01, 1.e+02]),
 [Text(0, 1e-08, '$\\mathdefault{10^{-8}}$'),
  Text(0, 1e-07, '$\\mathdefault{10^{-7}}$'),
  Text(0, 1e-06, '$\\mathdefault{10^{-6}}$'),
  Text(0, 1e-05, '$\\mathdefault{10^{-5}}$'),
  Text(0, 0.0001, '$\\mathdefault{10^{-4}}$'),
  Text(0, 0.001, '$\\mathdefault{10^{-3}}$'),
  Text(0, 0.01, '$\\mathdefault{10^{-2}}$'),
  Text(0, 0.1, '$\\mathdefault{10^{-1}}$'),
  Text(0, 1.0, '$\\mathdefault{10^{0}}$'),
  Text(0, 10.0, '$\\mathdefault{10^{1}}$'),
  Text(0, 100.0, '$\\mathdefault{10^{2}}$')])


%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# plt.xkcd()
fig = plt.figure(figsize=(10, 5))
ax = fig.add_subplot(121, projection='3d')
ax.plot_surface(x, y, f)
ax.set_title('Original function')
ax = fig.add_subplot(122, projection='3d')
ax.plot_surface(x, y, fappr - f)
ax.set_title('Approximation error with rank=%d, err=%3.1e' % (r, er))
fig.subplots_adjust()
fig.tight_layout()


import numpy as np
import matplotlib.pyplot as plt
n = 1000
a = jax.random.normal(jax.random.PRNGKey(244747), (n, n))
u, s, v = jnp.linalg.svd(a)
plt.semilogy(s/s[0])
plt.ylabel(r"$\sigma_i / \sigma_0$", fontsize=24)
plt.xlabel(r"Singular value index, $i$", fontsize=24)
plt.grid(True)
plt.xticks(fontsize=26)
plt.yticks(fontsize=26)

(array([1.e-06, 1.e-05, 1.e-04, 1.e-03, 1.e-02, 1.e-01, 1.e+00, 1.e+01,
        1.e+02]),
 [Text(0, 1e-06, '$\\mathdefault{10^{-6}}$'),
  Text(0, 1e-05, '$\\mathdefault{10^{-5}}$'),
  Text(0, 0.0001, '$\\mathdefault{10^{-4}}$'),
  Text(0, 0.001, '$\\mathdefault{10^{-3}}$'),
  Text(0, 0.01, '$\\mathdefault{10^{-2}}$'),
  Text(0, 0.1, '$\\mathdefault{10^{-1}}$'),
  Text(0, 1.0, '$\\mathdefault{10^{0}}$'),
  Text(0, 10.0, '$\\mathdefault{10^{1}}$'),
  Text(0, 100.0, '$\\mathdefault{10^{2}}$')])


import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

n = 256
a = [[1.0/(i - j + 0.5) for i in range(n)] for j in range(n)]
a = np.array(a)
#u, s, v = np.linalg.svd(a)
u, s, v = jnp.linalg.svd(a[n//2:, :n//2])
plt.semilogy(s/s[0])
plt.ylabel(r"$\sigma_i / \sigma_0$", fontsize=24)
plt.xlabel(r"Singular value index, $i$", fontsize=24)
plt.grid(True)
plt.xticks(fontsize=26)
plt.yticks(fontsize=26)
#s[0] - jnp.pi
#u, s, v = jnp.linalg.svd(a[:128:, :128])
#print(s[0]-jnp.pi)

DeviceArray(-0.3372376, dtype=float32)


from IPython.core.display import HTML
def css_styling():
    styles = open("../styles/custom.css", "r").read()
    return HTML(styles)
css_styling()

Lecture 4: Matrix rank, low-rank approximation, SVD¶

Previous lecture¶

Todays lecture¶

Matrix and linear spaces¶

Linear dependence¶

Linear (vector) space¶

Dimension of a linear space¶

Matrix rank¶

Full-rank matrix¶

Dimensionality reduction¶

Skeleton decomposition¶

Remark¶

Proof for the skeleton decomposition¶

A closer look on the skeleton decomposition¶

Storage¶

Computing matrix rank¶

Instability of the matrix rank¶

Low rank approximation¶

Singular value decomposition¶

Proof¶

Eckart-Young theorem¶

Proof¶

Main result on low-rank approximation¶

Computing SVD¶

Separation of variables for 2D functions¶

Function approximation¶

And 3d plots...¶

Singular values of a random Gaussian matrix¶

Linear factor analysis & low-rank¶

Applications of SVD¶

Dense matrix compression¶

Compression of parameters in fully-connected neural networs¶

Active Subspaces¶

How to discover Active Subspaces:¶

Take home message¶

Next lecture¶

Questions?¶