x = torch.tensor([[0,1,0],
                  [1,2,3],
                  [2,4,6]
                  ])
x

tensor([[0, 1, 0],
        [1, 2, 3],
        [2, 4, 6]])


x = np.random.uniform(size=(3,3))
torch.from_numpy(x)

tensor([[0.4860, 0.0246, 0.7205],
        [0.4484, 0.6652, 0.0402],
        [0.9689, 0.5528, 0.7985]], dtype=torch.float64)


dims = (4)
x = torch.zeros(dims)
x

tensor([0., 0., 0., 0.])


dims = (4)
x = torch.ones(dims)
x

tensor([1., 1., 1., 1.])


x = torch.arange(10)
x

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])


x = torch.linspace(-1,1,10)
x

tensor([-1.0000, -0.7778, -0.5556, -0.3333, -0.1111,  0.1111,  0.3333,  0.5556,
         0.7778,  1.0000])


x = torch.eye(3)
x

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])


x.diag()

tensor([1., 1., 1.])


x = torch.diag(torch.arange(3))
x

tensor([[0, 0, 0],
        [0, 1, 0],
        [0, 0, 2]])


dims = (3,3)
x = torch.rand(dims)
x

tensor([[0.1512, 0.5654, 0.4128],
        [0.4651, 0.1956, 0.7034],
        [0.7692, 0.8898, 0.7750]])


dims = (3,3)
x = torch.randn(dims)
x

tensor([[-0.2327, -0.0362,  0.5722],
        [ 0.9502,  1.0798,  0.4765],
        [-0.0577, -2.3726,  0.5873]])


x.mean(), x.std(), x.median(), x.min(), x.max()

(tensor(0.1074),
 tensor(1.0340),
 tensor(0.4765),
 tensor(-2.3726),
 tensor(1.0798))


x.mean(dim=1), x.std(dim=1), x.median(dim=1)[0], x.min(dim=1)[0], x.max(dim=1)[0]

(tensor([ 0.1011,  0.8355, -0.6144]),
 tensor([0.4197, 0.3176, 1.5565]),
 tensor([-0.0362,  0.9502, -0.0577]),
 tensor([-0.2327,  0.4765, -2.3726]),
 tensor([0.5722, 1.0798, 0.5873]))


x.median(dim=1)[1], x.min(dim=1)[1], x.max(dim=1)[1]

(tensor([1, 0, 0]), tensor([0, 2, 1]), tensor([2, 1, 2]))


x = torch.randn((3,2))
y = torch.randn((2,3))
x @ y

tensor([[-0.0930, -1.2068, -0.9818],
        [ 0.2951,  1.0160,  0.2693],
        [-1.1653, -1.0365,  1.9469]])


x = torch.randn((1,3))
y = torch.randn((3,1))
x

tensor([[-0.8451,  1.2279, -1.0831]])

y

tensor([[0.3426],
        [0.9155],
        [0.2896]])


x @ y

tensor([[0.5209]])


y @ x

tensor([[-0.0292,  0.2806,  0.4464],
        [ 0.0248, -0.2386, -0.3797],
        [-0.0213,  0.2045,  0.3254]])


x = torch.randn((2,3,4))
x

tensor([[[ 1.4408,  1.0554, -0.4302,  2.4403],
         [-1.5278, -1.2142,  0.7821,  0.2796],
         [ 0.5764, -0.8881, -2.0423, -0.9508]],

        [[ 0.9360, -0.5286, -0.2148, -0.6397],
         [-0.5315,  0.3589, -0.6161, -1.6595],
         [ 0.2836, -0.8500,  1.2469, -0.2715]]])


x.reshape(2*3,4)

tensor([[ 1.4408,  1.0554, -0.4302,  2.4403],
        [-1.5278, -1.2142,  0.7821,  0.2796],
        [ 0.5764, -0.8881, -2.0423, -0.9508],
        [ 0.9360, -0.5286, -0.2148, -0.6397],
        [-0.5315,  0.3589, -0.6161, -1.6595],
        [ 0.2836, -0.8500,  1.2469, -0.2715]])


x.reshape(-1,4), x.reshape(-1,4).shape

(tensor([[ 0.0159,  0.5230,  0.4219],
         [-0.8664, -1.3224, -0.2815],
         [ 0.3624, -0.9631, -0.1161],
         [-0.2072, -1.8525,  0.1093]]), torch.Size([4, 3]))


x.permute(1,0,2).shape

torch.Size([3, 2, 4])


x.permute(0,2,1).reshape(-1,3).shape

torch.Size([8, 3])


import torch


x = torch.randn((100,100))


U, S, V = torch.svd(x)
Q, R = torch.linalg.qr(x)


x = torch.randn((100,10)) @ torch.randn((10,100))


%timeit torch.svd(x)

714 µs ± 36.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


%timeit torch.svd_lowrank(x,q=10)

283 µs ± 8.97 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


torch.cuda.is_available()

True


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')


%timeit x = torch.randn((100,100)).to(device)

106 µs ± 2.29 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


%timeit x = torch.randn((100,100), device=device)

9.96 µs ± 114 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


x = torch.randn((5,5), device=device)
print(x.get_device())
x = x.detach().cpu()
print(x.get_device())

0
-1


x = torch.randn((100,1000),device="cpu")
y = torch.randn((1000,100),device="cpu")
%timeit x @ y

357 µs ± 5.17 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


x = torch.randn((100,1000),device="cuda")
y = torch.randn((1000,100),device="cuda")
%timeit x @ y

21.8 µs ± 506 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


x = torch.randn((100,1000),device="cuda")
y = torch.randn((1000,100),device="cpu")
x @ y

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-18-22e087ccddd4> in <module>
      1 x = torch.randn((100,1000),device="cuda")
      2 y = torch.randn((1000,100),device="cpu")
----> 3 x @ y

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat2 in method wrapper_mm)


net = torch.nn.Linear(100,10)
x = torch.randn((5,100))
x.shape

torch.Size([5, 100])


out = net(x)
out.shape

torch.Size([5, 10])


out_manual = []
for x_one in x:
    out_manual.append(net(x_one[None]))
out_manual = torch.cat(out_manual, dim=0)


torch.norm(out-out_manual)

tensor(7.7970e-07, grad_fn=<CopyBackwards>)


x_one.shape, x_one[None].shape

(torch.Size([100]), torch.Size([1, 100]))


x_one.unsqueeze(0).shape

torch.Size([1, 100])


net = torch.nn.Sequential(
    torch.nn.Linear(100,50),
    torch.nn.ReLU(),
    torch.nn.Linear(50,10)
)
net

Sequential(
  (0): Linear(in_features=100, out_features=50, bias=True)
  (1): ReLU()
  (2): Linear(in_features=50, out_features=10, bias=True)
)


x = torch.randn((5,100))


out = net(x)


out_manual = x
for i in range(len(net)):
    out_manual = net[i](out_manual)


torch.norm(out - out_manual)

tensor(0., grad_fn=<CopyBackwards>)


net = torch.nn.Sequential(
    torch.nn.Linear(100,10),
    torch.nn.ReLU(),
    torch.nn.Linear(10,1)
).to(device)


x = torch.randn((1,100), device="cuda")
x_var = torch.autograd.Variable(x,requires_grad=True)


grads = torch.autograd.grad(outputs=net(x_var), inputs=x_var, grad_outputs=torch.ones_like(net(x)))[0]


grads.shape

torch.Size([1, 100])


net = torch.nn.Sequential(
    torch.nn.Linear(100,50),
    torch.nn.ReLU(),
    torch.nn.Linear(50,20)
).to(device)
x = torch.randn((1,100), device="cuda")
x_var = torch.autograd.Variable(x,requires_grad=True)
grads = torch.autograd.grad(outputs=net(x_var), inputs=x_var, grad_outputs=torch.ones_like(net(x)))[0]


grads.shape, net(x).shape

(torch.Size([1, 100]), torch.Size([1, 20]))


def slow_grad_calc(x):
    grads = []
    for i in range(20):
        x_var = torch.autograd.Variable(x,requires_grad=True)
        out = net(x_var)[:,i]
        grad = torch.autograd.grad(out,x_var,torch.ones_like(out))[0][:,None]
        grads.append(grad)
    return torch.cat(grads,dim=1)


grads = slow_grad_calc(x)
grads.shape

torch.Size([1, 20, 100])


%timeit slow_grad_calc(x)

9.51 ms ± 152 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


jac_func = jacfwd(net)
jac_func(x).shape

torch.Size([1, 20, 1, 100])


%timeit jac_func(x)

1.17 ms ± 24.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


x = torch.randn((10,100),device=device)


grads = jac_func(x)
grads.shape

torch.Size([10, 20, 10, 100])


grads[0,:,0,:]

tensor([[-0.0077,  0.0243, -0.0019,  ...,  0.0293, -0.0044,  0.0351],
        [-0.0030,  0.0255,  0.0129,  ...,  0.0025, -0.0403,  0.0171],
        [ 0.0011, -0.0244,  0.0128,  ..., -0.0139,  0.0124,  0.0274],
        ...,
        [-0.0033, -0.0449, -0.0122,  ..., -0.0086,  0.0235, -0.0176],
        [-0.0164, -0.0018, -0.0123,  ...,  0.0323, -0.0443, -0.0130],
        [ 0.0219, -0.0086,  0.0348,  ..., -0.0059, -0.0297,  0.0355]],
       device='cuda:0', grad_fn=<SliceBackward0>)


grads[0,:,1,:]

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0',
       grad_fn=<SliceBackward0>)


vmap(jac_func)(x.unsqueeze(1)).shape

torch.Size([10, 1, 20, 1, 100])


x.unsqueeze(1).shape

torch.Size([10, 1, 100])


net(x,index=0)
i = 0
func = lambda x: net(x,index=i)
jacfwd(func)

PyTorch¶

Why do we use it?¶

How to initialize vectors, matrices, tensors (object with number of dimensions more than 2)?¶

Manual definition¶

From numpy¶

Zero, ones, random vector¶

Zeros¶

Ones¶

Range¶

Linspace¶

Matrices¶

Identity matrix¶

Diagonal of matrix¶

Diagonal matrix¶

Random matrix (uniform distribution)¶

Random matrix (normal distribution)¶

How calculate statistics on tensors¶

Numpy operations in PyTorch¶

Multiplication¶

Reshaping and dimension permutation¶

Decompositions¶

How to transfer data to GPU¶

Neural Networks¶

Simple network¶

More complex networks¶

How to calc gradients and jacobians? (functorch)¶

Calculation of jacobian using standard tools¶

FuncTorch¶

FuncTorch operations over batch¶