import random
import jax.numpy as jnp
import jax
from jax.config import config
config.update("jax_enable_x64", True)
#c = random.random()
#print(c)
c = jnp.float32(0.925924589693)
print(c)
a = jnp.float32(1e-50)
b = jnp.float32(c / a)
print('{0:10.16f}'.format(b))
print(abs(a * b - c)/abs(c))

0.9259246
       inf
nan


a = jnp.float32(1e-40)
b = jnp.sqrt(a)
print(b.dtype)
print('{0:10.64f}'.format(abs(b * b - a)/abs(a)))

float32
       nan


a = jnp.float32(1e99)
b = jnp.exp(a)
print(b.dtype)
print(jnp.log(b) - a)

float32
nan


import math

import jax.numpy as jnp
import numpy as np
import jax
from numba import jit as numba_jit

n = 10 ** 7
sm = 1e-10
x = jnp.ones(n, dtype=jnp.float32) * sm
x = x.at[0].set(1)
#x = jax.ops.index_update(x, [0], 1.)
true_sum = 1.0 + (n - 1)*sm
approx_sum = jnp.sum(x)
math_fsum = math.fsum(x)


@jax.jit
def dumb_sum(x):
    s = jnp.float32(0.0)
    def b_fun(i, val):
        return val + x[i] 
    s = jax.lax.fori_loop(0, len(x), b_fun, s)
    return s


@numba_jit(nopython=True)
def kahan_sum_numba(x):
    s = np.float32(0.0)
    c = np.float32(0.0)
    for i in range(len(x)):
        y = x[i] - c
        t = s + y
        c = (t - s) - y
        s = t
    return s

@jax.jit
def kahan_sum_jax(x):
    s = jnp.float32(0.0)
    c = jnp.float32(0.0)
    def b_fun2(i, val):
        s, c = val
        y = x[i] - c
        t = s + y
        c = (t - s) - y
        s = t
        return s, c
    s, c = jax.lax.fori_loop(0, len(x), b_fun2, (s, c))
    return s

k_sum_numba = kahan_sum_numba(np.array(x))
k_sum_jax = kahan_sum_jax(x)
d_sum = dumb_sum(x)
print('Error in np sum: {0:3.1e}'.format(approx_sum - true_sum))
print('Error in Kahan sum Numba: {0:3.1e}'.format(k_sum_numba - true_sum))
print('Error in Kahan sum JAX: {0:3.1e}'.format(k_sum_jax - true_sum))
print('Error in dumb sum: {0:3.1e}'.format(d_sum - true_sum))
print('Error in math fsum: {0:3.1e}'.format(math_fsum - true_sum))

Error in np sum: -8.3e-07
Error in Kahan sum Numba: 4.7e-08
Error in Kahan sum JAX: 0.0e+00
Error in dumb sum: -1.0e-03
Error in math fsum: 1.3e-11


import math
test_list = [1, 1e20, 1, -1e20]
print(math.fsum(test_list))
print(jnp.sum(jnp.array(test_list, dtype=jnp.float32)))
print(1 + 1e20 + 1 - 1e20)

2.0
0.0
0.0


n = 100
a = jnp.ones(n)
b = a + 1e-3 * jax.random.normal(jax.random.PRNGKey(0), (n,))
print('Relative error in L1 norm:', jnp.linalg.norm(a - b, 1) / jnp.linalg.norm(b, 1))
print('Relative error in L2 norm:', jnp.linalg.norm(a - b) / jnp.linalg.norm(b))
print('Relative error in Chebyshev norm:', jnp.linalg.norm(a - b, jnp.inf) / jnp.linalg.norm(b, jnp.inf))

Relative error in L1 norm: 0.0008608277121789923
Relative error in L2 norm: 0.0010668749128008221
Relative error in Chebyshev norm: 0.0025285461541625647


%matplotlib inline
import matplotlib.pyplot as plt
p = 0.5 # Which norm do we use
M = 4000 # Number of sampling points
b = []
for i in range(M):
    a = jax.random.normal(jax.random.PRNGKey(i), (1, 2))
    if jnp.linalg.norm(a[i, :], p) <= 1:
        b.append(a[i, :])
b = jnp.array(b)
plt.plot(b[:, 0], b[:, 1], '.')
plt.axis('equal')
plt.title('Unit disk in the p-th norm, $p={0:}$'.format(p))

Text(0.5, 1.0, 'Unit disk in the p-th norm, $p=0.5$')


import numpy as np
n = 20
a = [[1.0/(i + j + 1) for i in range(n)] for j in range(n)] # Hilbert matrix
A = jnp.array(a)
#rhs =  jax.random.normal(jax.random.PRNGKey(0), (n,))
rhs = jnp.ones(n)
sol = jnp.linalg.solve(A, rhs)
print(jnp.linalg.norm(A @ sol - rhs)/jnp.linalg.norm(rhs))
plt.plot(sol)

1.2877871109130684e-08

[<matplotlib.lines.Line2D at 0x2f927eeb0>]


rhs = jnp.ones(n)
sol = jnp.linalg.solve(A, rhs)
print(jnp.linalg.norm(A @ sol - rhs)/jnp.linalg.norm(rhs))
#plt.plot(sol)


u = 300
eps = 1e-6
print("Original function:", jnp.log(1 - jnp.tanh(u)**2))
eps_add = jnp.log(1 - jnp.tanh(u)**2 + eps)
print("Attempt to improve stability by adding a small constant:", eps_add)
print("Use more numerically stable form:", jnp.log(4) - 2 * jnp.log(jnp.exp(-u) + jnp.exp(u)))


n = 5
x = jax.random.normal(jax.random.PRNGKey(0), (n, ))
x = jax.ops.index_update(x, [0], 1000)
print(jnp.exp(x) / jnp.sum(jnp.exp(x)))
print(jnp.exp(x - jnp.max(x)) / jnp.sum(jnp.exp(x - jnp.max(x))))

Name	Common Name	Base	Digits	Emin	Emax
binary16	half precision	2	11	-14	+ 15
binary32	single precision	2	24	-126	+ 127
binary64	double precision	2	53	-1022	+1023

Lecture 1: Floating-point arithmetic, vector norms¶

Syllabus¶

Representation of numbers¶

Fixed point¶

Floating point¶

Floating point: formula¶

Fixed vs Floating¶

IEEE 754¶

The two most common format, single & double¶

Examples¶

Accuracy and memory¶

How does number representation format affect training of neural networks (NN)?¶

bfloat16 (Brain Floating Point)¶

Tensor Float from Nvidia (blog post about this format)¶

Mixed precision (docs from Nvidia)¶

Alternative to the IEEE 754 standard¶

Division accuracy demo¶

Square root accuracy demo¶

Exponent accuracy demo¶

Summary of demos¶

Loss of significance¶

Summation algorithm¶

Naïve algorithm

Kahan summation¶

More complicated example¶

Summary of floating-point¶

Vectors¶

Vector norm¶

Distances and norms¶

Standard norms¶

$p$-norm¶

Equivalence of the norms¶

Computing norms in Python¶

Unit disks in different norms¶

Why $L_1$-norm can be important?¶

What is a stable algorithm?¶

Classical example¶

More examples of instability¶

Take home message¶

Next lecture¶