import torch


x = torch.tensor([[0,1,0],
                  [1,2,3],
                  [2,4,6]
                  ])
x

tensor([[0, 1, 0],
        [1, 2, 3],
        [2, 4, 6]])


import numpy as np


x = np.random.uniform(size=(3,3))
torch.from_numpy(x)

tensor([[0.4860, 0.0246, 0.7205],
        [0.4484, 0.6652, 0.0402],
        [0.9689, 0.5528, 0.7985]], dtype=torch.float64)


dims = (4)
x = torch.zeros(dims)
x

tensor([0., 0., 0., 0.])


dims = (4)
x = torch.ones(dims)
x

tensor([1., 1., 1., 1.])


x = torch.arange(10)
x

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])


x = torch.linspace(-1,1,10)
x

tensor([-1.0000, -0.7778, -0.5556, -0.3333, -0.1111,  0.1111,  0.3333,  0.5556,
         0.7778,  1.0000])


x = torch.eye(3)
x

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])


x.diag()

tensor([1., 1., 1.])


x = torch.diag(torch.arange(3))
x

tensor([[0, 0, 0],
        [0, 1, 0],
        [0, 0, 2]])


dims = (3,3)
x = torch.rand(dims)
x

tensor([[0.1512, 0.5654, 0.4128],
        [0.4651, 0.1956, 0.7034],
        [0.7692, 0.8898, 0.7750]])


dims = (3,3)
x = torch.randn(dims)
x

tensor([[-0.2327, -0.0362,  0.5722],
        [ 0.9502,  1.0798,  0.4765],
        [-0.0577, -2.3726,  0.5873]])


x.mean(), x.std(), x.median(), x.min(), x.max()

(tensor(0.1074),
 tensor(1.0340),
 tensor(0.4765),
 tensor(-2.3726),
 tensor(1.0798))


x.mean(dim=1), x.std(dim=1), x.median(dim=1)[0], x.min(dim=1)[0], x.max(dim=1)[0]

(tensor([ 0.1011,  0.8355, -0.6144]),
 tensor([0.4197, 0.3176, 1.5565]),
 tensor([-0.0362,  0.9502, -0.0577]),
 tensor([-0.2327,  0.4765, -2.3726]),
 tensor([0.5722, 1.0798, 0.5873]))


x.median(dim=1)[1], x.min(dim=1)[1], x.max(dim=1)[1]

(tensor([1, 0, 0]), tensor([0, 2, 1]), tensor([2, 1, 2]))


x = torch.randn((3,2))
y = torch.randn((2,3))
x @ y

tensor([[-0.0930, -1.2068, -0.9818],
        [ 0.2951,  1.0160,  0.2693],
        [-1.1653, -1.0365,  1.9469]])


x = torch.randn((1,3))
y = torch.randn((3,1))
x

tensor([[-0.8451,  1.2279, -1.0831]])

y

tensor([[0.3426],
        [0.9155],
        [0.2896]])


x @ y

tensor([[0.5209]])


y @ x

tensor([[-0.0292,  0.2806,  0.4464],
        [ 0.0248, -0.2386, -0.3797],
        [-0.0213,  0.2045,  0.3254]])


x = torch.randn((2,3,4))
x

tensor([[[ 1.4408,  1.0554, -0.4302,  2.4403],
         [-1.5278, -1.2142,  0.7821,  0.2796],
         [ 0.5764, -0.8881, -2.0423, -0.9508]],

        [[ 0.9360, -0.5286, -0.2148, -0.6397],
         [-0.5315,  0.3589, -0.6161, -1.6595],
         [ 0.2836, -0.8500,  1.2469, -0.2715]]])


x.reshape(2*3,4)

tensor([[ 1.4408,  1.0554, -0.4302,  2.4403],
        [-1.5278, -1.2142,  0.7821,  0.2796],
        [ 0.5764, -0.8881, -2.0423, -0.9508],
        [ 0.9360, -0.5286, -0.2148, -0.6397],
        [-0.5315,  0.3589, -0.6161, -1.6595],
        [ 0.2836, -0.8500,  1.2469, -0.2715]])


x.reshape(-1,4), x.reshape(-1,4).shape

(tensor([[ 0.0159,  0.5230,  0.4219],
         [-0.8664, -1.3224, -0.2815],
         [ 0.3624, -0.9631, -0.1161],
         [-0.2072, -1.8525,  0.1093]]), torch.Size([4, 3]))


x.permute(1,0,2).shape

torch.Size([3, 2, 4])


x.permute(0,2,1).reshape(-1,3).shape

torch.Size([8, 3])


import torch


x = torch.randn((100,100))


U, S, V = torch.svd(x)
Q, R = torch.linalg.qr(x)


x = torch.randn((100,10)) @ torch.randn((10,100))


%timeit torch.svd(x)

714 µs ± 36.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


%timeit torch.svd_lowrank(x,q=10)

283 µs ± 8.97 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


import torch


torch.cuda.is_available()

True


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')


%timeit x = torch.randn((100,100)).to(device)

106 µs ± 2.29 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


%timeit x = torch.randn((100,100), device=device)

9.96 µs ± 114 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


x = torch.randn((5,5), device=device)
print(x.get_device())
x = x.detach().cpu()
print(x.get_device())

0
-1


x = torch.randn((100,1000),device="cpu")
y = torch.randn((1000,100),device="cpu")
%timeit x @ y

357 µs ± 5.17 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


x = torch.randn((100,1000),device="cuda")
y = torch.randn((1000,100),device="cuda")
%timeit x @ y

21.8 µs ± 506 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


x = torch.randn((100,1000),device="cuda")
y = torch.randn((1000,100),device="cpu")
x @ y

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-18-22e087ccddd4> in <module>
      1 x = torch.randn((100,1000),device="cuda")
      2 y = torch.randn((1000,100),device="cpu")
----> 3 x @ y

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat2 in method wrapper_mm)


import torch


net = torch.nn.Linear(100,10)
x = torch.randn((5,100))
x.shape

torch.Size([5, 100])


out = net(x)
out.shape

torch.Size([5, 10])


out_manual = []
for x_one in x:
    out_manual.append(net(x_one[None]))
out_manual = torch.cat(out_manual, dim=0)


torch.norm(out-out_manual)

tensor(7.7970e-07, grad_fn=<CopyBackwards>)


x_one.shape, x_one[None].shape

(torch.Size([100]), torch.Size([1, 100]))


x_one.unsqueeze(0).shape

torch.Size([1, 100])


net = torch.nn.Sequential(
    torch.nn.Linear(100,50),
    torch.nn.ReLU(),
    torch.nn.Linear(50,10)
)
net

Sequential(
  (0): Linear(in_features=100, out_features=50, bias=True)
  (1): ReLU()
  (2): Linear(in_features=50, out_features=10, bias=True)
)


x = torch.randn((5,100))


out = net(x)


out_manual = x
for i in range(len(net)):
    out_manual = net[i](out_manual)


torch.norm(out - out_manual)

tensor(0., grad_fn=<CopyBackwards>)


!pip install functorch

Collecting functorch
  Downloading functorch-1.13.0-py2.py3-none-any.whl (2.1 kB)
Collecting torch<1.13.1,>=1.13.0
  Downloading torch-1.13.0-cp37-cp37m-win_amd64.whl (167.3 MB)
     -------------------------------------- 167.3/167.3 MB 4.5 MB/s eta 0:00:00
Requirement already satisfied: typing-extensions in c:\users\egor\anaconda3\lib\site-packages (from torch<1.13.1,>=1.13.0->functorch) (4.4.0)
Installing collected packages: torch, functorch
  Attempting uninstall: torch
    Found existing installation: torch 1.10.1+cu102
    Uninstalling torch-1.10.1+cu102:
      Successfully uninstalled torch-1.10.1+cu102
Successfully installed functorch-1.13.0 torch-1.13.0

WARNING: Error parsing requirements for jupyter: [Errno 2] No such file or directory: 'c:\\users\\egor\\anaconda3\\lib\\site-packages\\jupyter-1.0.0.dist-info\\METADATA'
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torchvision 0.11.2+cu102 requires torch==1.10.1, but you have torch 1.13.0 which is incompatible.
torchaudio 0.10.1+cu102 requires torch==1.10.1, but you have torch 1.13.0 which is incompatible.


import torch
from functorch import jacrev,jacfwd,vmap


device = "cuda"


net = torch.nn.Sequential(
    torch.nn.Linear(100,10),
    torch.nn.ReLU(),
    torch.nn.Linear(10,1)
).to(device)


x = torch.randn((1,100), device="cuda")
x_var = torch.autograd.Variable(x,requires_grad=True)


grads = torch.autograd.grad(outputs=net(x_var), inputs=x_var, grad_outputs=torch.ones_like(net(x)))[0]


grads.shape

torch.Size([1, 100])


net = torch.nn.Sequential(
    torch.nn.Linear(100,50),
    torch.nn.ReLU(),
    torch.nn.Linear(50,20)
).to(device)
x = torch.randn((1,100), device="cuda")
x_var = torch.autograd.Variable(x,requires_grad=True)
grads = torch.autograd.grad(outputs=net(x_var), inputs=x_var, grad_outputs=torch.ones_like(net(x)))[0]


grads.shape, net(x).shape

(torch.Size([1, 100]), torch.Size([1, 20]))


def slow_grad_calc(x):
    grads = []
    for i in range(20):
        x_var = torch.autograd.Variable(x,requires_grad=True)
        out = net(x_var)[:,i]
        grad = torch.autograd.grad(out,x_var,torch.ones_like(out))[0][:,None]
        grads.append(grad)
    return torch.cat(grads,dim=1)


grads = slow_grad_calc(x)
grads.shape

torch.Size([1, 20, 100])


%timeit slow_grad_calc(x)

9.51 ms ± 152 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


jac_func = jacfwd(net)
jac_func(x).shape

torch.Size([1, 20, 1, 100])


%timeit jac_func(x)

1.17 ms ± 24.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


x = torch.randn((10,100),device=device)


grads = jac_func(x)
grads.shape

torch.Size([10, 20, 10, 100])


grads[0,:,0,:]

tensor([[-0.0077,  0.0243, -0.0019,  ...,  0.0293, -0.0044,  0.0351],
        [-0.0030,  0.0255,  0.0129,  ...,  0.0025, -0.0403,  0.0171],
        [ 0.0011, -0.0244,  0.0128,  ..., -0.0139,  0.0124,  0.0274],
        ...,
        [-0.0033, -0.0449, -0.0122,  ..., -0.0086,  0.0235, -0.0176],
        [-0.0164, -0.0018, -0.0123,  ...,  0.0323, -0.0443, -0.0130],
        [ 0.0219, -0.0086,  0.0348,  ..., -0.0059, -0.0297,  0.0355]],
       device='cuda:0', grad_fn=<SliceBackward0>)


grads[0,:,1,:]

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0',
       grad_fn=<SliceBackward0>)


vmap(jac_func)(x.unsqueeze(1)).shape

torch.Size([10, 1, 20, 1, 100])


x.unsqueeze(1).shape

torch.Size([10, 1, 100])


net(x,index=0)
i = 0
func = lambda x: net(x,index=i)
jacfwd(func)

PyTorch¶

Why do we use it?¶

How to initialize vectors, matrices, tensors (object with number of dimensions more than 2)?¶

Manual definition¶

From numpy¶

Zero, ones, random vector¶

Zeros¶

Ones¶

Range¶

Linspace¶

Matrices¶

Identity matrix¶

Diagonal of matrix¶

Diagonal matrix¶

Random matrix (uniform distribution)¶

Random matrix (normal distribution)¶

How calculate statistics on tensors¶

Numpy operations in PyTorch¶

Multiplication¶

Reshaping and dimension permutation¶

Decompositions¶

How to transfer data to GPU¶

Neural Networks¶

Simple network¶

More complex networks¶

How to calc gradients and jacobians? (functorch)¶

Calculation of jacobian using standard tools¶

FuncTorch¶

FuncTorch operations over batch¶