# Your solution is here


# Your solution is here


import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, transforms
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

batch_size = 100

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

train_loader = torch.utils.data.DataLoader(datasets.CIFAR10('./', train=True, download=True, transform=transform), 
                                        batch_size=batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(datasets.CIFAR10('./', train=False, transform=transform), 
                                          batch_size=batch_size, shuffle=True)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified


def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.figure(figsize=(20, 10))
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# get some random training images
dataiter = iter(train_loader)
images, labels = dataiter.next()

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(8)))

 frog plane   dog   cat  frog  deer   dog   cat


class ResidualBlock(nn.Module):
    def __init__(self, in_features, middle_features, activation=nn.ReLU()):
        super().__init__()
        self.layer1 = nn.Linear(in_features, middle_features)
        self.layer2 = nn.Linear(middle_features, in_features)
        self.activation = activation
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.activation(out)
        out = self.layer2(out)
        out = out + x
        out = self.activation(out)
        return out

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(3 * 32 * 32, 1000)
        self.res1 = ResidualBlock(1000,1000)
        self.res2 = ResidualBlock(1000,1000)
        self.fc2 = nn.Linear(1000, 10)
        self.ReLU = nn.ReLU()

    def forward(self, x):
        x = self.fc1(x.view(-1, 3 * 32*32))
        x = self.ReLU(x)
        x = self.res1(x)
        x = self.res2(x)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)


def train(model, train_loader, optimizer, epoch):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.train()
    model.to(device)
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))


def test(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    device = "cuda" if torch.cuda.is_available() else "cpu"
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


log_interval = 50
epochs = 7


model = Net()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(1, epochs + 1):
    train(model,  train_loader, optimizer, epoch)
    test(model, test_loader)

Train Epoch: 1 [0/50000 (0%)]	Loss: 2.324131
Train Epoch: 1 [5000/50000 (10%)]	Loss: 1.751483
Train Epoch: 1 [10000/50000 (20%)]	Loss: 1.567799
Train Epoch: 1 [15000/50000 (30%)]	Loss: 1.660581
Train Epoch: 1 [20000/50000 (40%)]	Loss: 1.531731
Train Epoch: 1 [25000/50000 (50%)]	Loss: 1.592496
Train Epoch: 1 [30000/50000 (60%)]	Loss: 1.671944
Train Epoch: 1 [35000/50000 (70%)]	Loss: 1.381795
Train Epoch: 1 [40000/50000 (80%)]	Loss: 1.463287
Train Epoch: 1 [45000/50000 (90%)]	Loss: 1.673219

Test set: Average loss: 1.4851, Accuracy: 4772/10000 (48%)

Train Epoch: 2 [0/50000 (0%)]	Loss: 1.170636
Train Epoch: 2 [5000/50000 (10%)]	Loss: 1.359391
Train Epoch: 2 [10000/50000 (20%)]	Loss: 1.563428
Train Epoch: 2 [15000/50000 (30%)]	Loss: 1.337223
Train Epoch: 2 [20000/50000 (40%)]	Loss: 1.377386
Train Epoch: 2 [25000/50000 (50%)]	Loss: 1.385065
Train Epoch: 2 [30000/50000 (60%)]	Loss: 1.359587
Train Epoch: 2 [35000/50000 (70%)]	Loss: 1.294609
Train Epoch: 2 [40000/50000 (80%)]	Loss: 1.692260
Train Epoch: 2 [45000/50000 (90%)]	Loss: 1.325166

Test set: Average loss: 1.4136, Accuracy: 5059/10000 (51%)

Train Epoch: 3 [0/50000 (0%)]	Loss: 1.359571
Train Epoch: 3 [5000/50000 (10%)]	Loss: 1.244511
Train Epoch: 3 [10000/50000 (20%)]	Loss: 1.127326
Train Epoch: 3 [15000/50000 (30%)]	Loss: 1.145635
Train Epoch: 3 [20000/50000 (40%)]	Loss: 1.234360
Train Epoch: 3 [25000/50000 (50%)]	Loss: 1.113500
Train Epoch: 3 [30000/50000 (60%)]	Loss: 1.355322
Train Epoch: 3 [35000/50000 (70%)]	Loss: 1.549211
Train Epoch: 3 [40000/50000 (80%)]	Loss: 1.377020
Train Epoch: 3 [45000/50000 (90%)]	Loss: 1.103844

Test set: Average loss: 1.3749, Accuracy: 5239/10000 (52%)

Train Epoch: 4 [0/50000 (0%)]	Loss: 1.193062
Train Epoch: 4 [5000/50000 (10%)]	Loss: 1.273741
Train Epoch: 4 [10000/50000 (20%)]	Loss: 1.404070
Train Epoch: 4 [15000/50000 (30%)]	Loss: 1.080328
Train Epoch: 4 [20000/50000 (40%)]	Loss: 1.224075
Train Epoch: 4 [25000/50000 (50%)]	Loss: 1.333977
Train Epoch: 4 [30000/50000 (60%)]	Loss: 1.072122
Train Epoch: 4 [35000/50000 (70%)]	Loss: 1.021575
Train Epoch: 4 [40000/50000 (80%)]	Loss: 1.187461
Train Epoch: 4 [45000/50000 (90%)]	Loss: 1.171425

Test set: Average loss: 1.3591, Accuracy: 5292/10000 (53%)

Train Epoch: 5 [0/50000 (0%)]	Loss: 1.168614
Train Epoch: 5 [5000/50000 (10%)]	Loss: 1.028332
Train Epoch: 5 [10000/50000 (20%)]	Loss: 0.973166
Train Epoch: 5 [15000/50000 (30%)]	Loss: 1.170292
Train Epoch: 5 [20000/50000 (40%)]	Loss: 1.056305
Train Epoch: 5 [25000/50000 (50%)]	Loss: 0.999553
Train Epoch: 5 [30000/50000 (60%)]	Loss: 0.904085
Train Epoch: 5 [35000/50000 (70%)]	Loss: 1.236696
Train Epoch: 5 [40000/50000 (80%)]	Loss: 1.060990
Train Epoch: 5 [45000/50000 (90%)]	Loss: 1.029037

Test set: Average loss: 1.3808, Accuracy: 5349/10000 (53%)

Train Epoch: 6 [0/50000 (0%)]	Loss: 0.827618
Train Epoch: 6 [5000/50000 (10%)]	Loss: 0.812197
Train Epoch: 6 [10000/50000 (20%)]	Loss: 0.885723
Train Epoch: 6 [15000/50000 (30%)]	Loss: 0.901153
Train Epoch: 6 [20000/50000 (40%)]	Loss: 0.758302
Train Epoch: 6 [25000/50000 (50%)]	Loss: 0.850228
Train Epoch: 6 [30000/50000 (60%)]	Loss: 1.227192
Train Epoch: 6 [35000/50000 (70%)]	Loss: 0.877823
Train Epoch: 6 [40000/50000 (80%)]	Loss: 1.058005
Train Epoch: 6 [45000/50000 (90%)]	Loss: 1.079689

Test set: Average loss: 1.4376, Accuracy: 5333/10000 (53%)

Train Epoch: 7 [0/50000 (0%)]	Loss: 0.879806
Train Epoch: 7 [5000/50000 (10%)]	Loss: 0.626541
Train Epoch: 7 [10000/50000 (20%)]	Loss: 0.762323
Train Epoch: 7 [15000/50000 (30%)]	Loss: 0.807961
Train Epoch: 7 [20000/50000 (40%)]	Loss: 0.981910
Train Epoch: 7 [25000/50000 (50%)]	Loss: 1.056234
Train Epoch: 7 [30000/50000 (60%)]	Loss: 0.776608
Train Epoch: 7 [35000/50000 (70%)]	Loss: 0.904691
Train Epoch: 7 [40000/50000 (80%)]	Loss: 1.145846
Train Epoch: 7 [45000/50000 (90%)]	Loss: 0.839523

Test set: Average loss: 1.4711, Accuracy: 5333/10000 (53%)


# Your solution is here


import jax.numpy as jnp
import jax
import numpy as np
import random

class Game:
    def __init__(self, key):
        # key is a jax.random.PRNGKey
        self.key = key
        return
    def hermione_chooses_r_and_W(self):
        self.r = random.randint(1, 95)
        self.key, subkey = jax.random.split(self.key)
        self.W1 = jax.random.uniform(subkey, (self.r, 100), maxval=100., minval=0., dtype=jnp.float32)
        self.key, subkey = jax.random.split(self.key)
        self.W2 = jax.random.uniform(subkey, (100, self.r), maxval=100., minval=0., dtype=jnp.float32)
    def hermione_computes_function(self, x):
        return jnp.sum(jnp.square(jnp.sin(self.W2 @ jnp.cos(self.W1 @ x))))
    def harry_answers(self):
        # <your code here>
        # you shouldn't use self.r, self.W1, or self.W2
        # you can call `hermione_computes_function` multiple times
        r = 1 # for example
        return r
    
    def play(self, n_rounds, verbose=True):
        # n_rounds: a number of rounds of the game
        # verbose: print or not the result of each round
        n_right_answers = 0
        for _ in range(n_rounds):
            self.hermione_chooses_r_and_W()
            r = self.harry_answers()
            if abs(r - self.r) == 0:
                if verbose:
                    print("Good job! The true answer is {}, Harry's answer is {}!".format(self.r, r))
                n_right_answers += 1
            else:
                if verbose:
                    print("Harry's answers is {}, but the true answer is {} :(".format(r, self.r))
        if float(n_right_answers) / n_rounds > 0.95:
            print('Well done: {}/{} right answers!'.format(n_right_answers, n_rounds))
        else:
            print('Only {}/{} right answers :( Work a little more and you will succeed!'.format(
                n_right_answers, n_rounds))


key = jax.random.PRNGKey(713)
game = Game(key)
game.play(n_rounds=100, verbose=False)

/Users/alex/anaconda3/envs/pytorch/lib/python3.6/site-packages/jax/lib/xla_bridge.py:122: UserWarning: No GPU/TPU found, falling back to CPU.
  warnings.warn('No GPU/TPU found, falling back to CPU.')

Only 1/100 right answers :( Work a little more and you will succeed!

Problem Set 1 (96 points)¶

Important information¶

Problem 1 (Theoretical tasks) (36 pts)¶

Problem 2 (Matrix calculus) (15 pts)¶

Problem 3. Compression of the fully-connected layers in neural network with simple architecture (30 pts)¶

Zero step: install PyTorch¶

First step: download CIFAR10 dataset¶

Check what images are we going to classify¶

Second step: neural network architecture¶

Implement functions for training and testing after every sweep over all dataset entries¶

Set parameters for training and print intermediate loss values¶

Third step: run training with the Adam optimization method¶

Problem 4. «Reducio!» (15 pts)¶