# Your solution is here


# Your solution is here


import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, transforms
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

batch_size = 100

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

train_loader = torch.utils.data.DataLoader(datasets.CIFAR10('./', train=True, download=True, transform=transform), 
                                        batch_size=batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(datasets.CIFAR10('./', train=False, transform=transform), 
                                          batch_size=batch_size, shuffle=True)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.figure(figsize=(20, 10))
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# get some random training images
dataiter = iter(train_loader)
images, labels = dataiter.next()

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(8)))

  dog horse truck plane plane horse horse   dog


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(3 * 32 * 32, 1000)
        self.fc2 = nn.Linear(1000, 1000)
        self.fc3 = nn.Linear(1000, 1000)
        self.fc4 = nn.Linear(1000, 1000)
        self.fc5 = nn.Linear(1000, 1000)
        self.fc6 = nn.Linear(1000, 10)
        self.ReLU = nn.ReLU()

    def forward(self, x):
        x = self.fc1(x.view(-1, 3 * 32*32))
        x = self.ReLU(x)
        x = self.fc2(x)
        x = self.ReLU(x)
        x = self.fc3(x)
        x = self.ReLU(x)
        x = self.fc4(x)
        x = self.ReLU(x)
        x = self.fc5(x)
        x = self.ReLU(x)
        x = self.fc6(x)
        return F.log_softmax(x, dim=1)


def train(model, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))


def test(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
#             data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


log_interval = 50
epochs = 7


model = Net()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(1, epochs + 1):
    train(model,  train_loader, optimizer, epoch)
    test(model, test_loader)

Train Epoch: 1 [0/50000 (0%)]	Loss: 2.303509
Train Epoch: 1 [5000/50000 (10%)]	Loss: 2.017663
Train Epoch: 1 [10000/50000 (20%)]	Loss: 2.012401
Train Epoch: 1 [15000/50000 (30%)]	Loss: 1.880147
Train Epoch: 1 [20000/50000 (40%)]	Loss: 1.677673
Train Epoch: 1 [25000/50000 (50%)]	Loss: 1.762426
Train Epoch: 1 [30000/50000 (60%)]	Loss: 1.699280
Train Epoch: 1 [35000/50000 (70%)]	Loss: 1.811280
Train Epoch: 1 [40000/50000 (80%)]	Loss: 1.371416
Train Epoch: 1 [45000/50000 (90%)]	Loss: 1.669703

Test set: Average loss: 1.5764, Accuracy: 4378/10000 (44%)

Train Epoch: 2 [0/50000 (0%)]	Loss: 1.346880
Train Epoch: 2 [5000/50000 (10%)]	Loss: 1.285201
Train Epoch: 2 [10000/50000 (20%)]	Loss: 1.515415
Train Epoch: 2 [15000/50000 (30%)]	Loss: 1.770635
Train Epoch: 2 [20000/50000 (40%)]	Loss: 1.582595
Train Epoch: 2 [25000/50000 (50%)]	Loss: 1.431566
Train Epoch: 2 [30000/50000 (60%)]	Loss: 1.358462
Train Epoch: 2 [35000/50000 (70%)]	Loss: 1.410893
Train Epoch: 2 [40000/50000 (80%)]	Loss: 1.661417
Train Epoch: 2 [45000/50000 (90%)]	Loss: 1.529408

Test set: Average loss: 1.4822, Accuracy: 4786/10000 (48%)

Train Epoch: 3 [0/50000 (0%)]	Loss: 1.470752
Train Epoch: 3 [5000/50000 (10%)]	Loss: 1.197014
Train Epoch: 3 [10000/50000 (20%)]	Loss: 1.482764
Train Epoch: 3 [15000/50000 (30%)]	Loss: 1.230034
Train Epoch: 3 [20000/50000 (40%)]	Loss: 1.359152
Train Epoch: 3 [25000/50000 (50%)]	Loss: 1.512282
Train Epoch: 3 [30000/50000 (60%)]	Loss: 1.559847
Train Epoch: 3 [35000/50000 (70%)]	Loss: 1.481677
Train Epoch: 3 [40000/50000 (80%)]	Loss: 1.335407
Train Epoch: 3 [45000/50000 (90%)]	Loss: 1.456683

Test set: Average loss: 1.4086, Accuracy: 5091/10000 (51%)

Train Epoch: 4 [0/50000 (0%)]	Loss: 1.316813
Train Epoch: 4 [5000/50000 (10%)]	Loss: 1.349132
Train Epoch: 4 [10000/50000 (20%)]	Loss: 1.344821
Train Epoch: 4 [15000/50000 (30%)]	Loss: 1.472614
Train Epoch: 4 [20000/50000 (40%)]	Loss: 1.342841
Train Epoch: 4 [25000/50000 (50%)]	Loss: 1.232694
Train Epoch: 4 [30000/50000 (60%)]	Loss: 1.187062
Train Epoch: 4 [35000/50000 (70%)]	Loss: 1.362742
Train Epoch: 4 [40000/50000 (80%)]	Loss: 1.120951
Train Epoch: 4 [45000/50000 (90%)]	Loss: 1.142083

Test set: Average loss: 1.4046, Accuracy: 5173/10000 (52%)

Train Epoch: 5 [0/50000 (0%)]	Loss: 1.248556
Train Epoch: 5 [5000/50000 (10%)]	Loss: 1.106759
Train Epoch: 5 [10000/50000 (20%)]	Loss: 1.129976
Train Epoch: 5 [15000/50000 (30%)]	Loss: 1.121431
Train Epoch: 5 [20000/50000 (40%)]	Loss: 1.309171
Train Epoch: 5 [25000/50000 (50%)]	Loss: 1.135936
Train Epoch: 5 [30000/50000 (60%)]	Loss: 1.333919
Train Epoch: 5 [35000/50000 (70%)]	Loss: 1.103451
Train Epoch: 5 [40000/50000 (80%)]	Loss: 1.062910
Train Epoch: 5 [45000/50000 (90%)]	Loss: 1.201752

Test set: Average loss: 1.3646, Accuracy: 5323/10000 (53%)

Train Epoch: 6 [0/50000 (0%)]	Loss: 1.094037
Train Epoch: 6 [5000/50000 (10%)]	Loss: 0.929148
Train Epoch: 6 [10000/50000 (20%)]	Loss: 1.146780
Train Epoch: 6 [15000/50000 (30%)]	Loss: 1.208604
Train Epoch: 6 [20000/50000 (40%)]	Loss: 1.335437
Train Epoch: 6 [25000/50000 (50%)]	Loss: 1.109769
Train Epoch: 6 [30000/50000 (60%)]	Loss: 1.153895
Train Epoch: 6 [35000/50000 (70%)]	Loss: 1.133219
Train Epoch: 6 [40000/50000 (80%)]	Loss: 1.035433
Train Epoch: 6 [45000/50000 (90%)]	Loss: 1.206637

Test set: Average loss: 1.3586, Accuracy: 5422/10000 (54%)

Train Epoch: 7 [0/50000 (0%)]	Loss: 0.927365
Train Epoch: 7 [5000/50000 (10%)]	Loss: 1.119912
Train Epoch: 7 [10000/50000 (20%)]	Loss: 0.962684
Train Epoch: 7 [15000/50000 (30%)]	Loss: 1.104071
Train Epoch: 7 [20000/50000 (40%)]	Loss: 1.173813
Train Epoch: 7 [25000/50000 (50%)]	Loss: 0.904804
Train Epoch: 7 [30000/50000 (60%)]	Loss: 1.081464
Train Epoch: 7 [35000/50000 (70%)]	Loss: 0.863629
Train Epoch: 7 [40000/50000 (80%)]	Loss: 1.030450
Train Epoch: 7 [45000/50000 (90%)]	Loss: 1.005699

Test set: Average loss: 1.3439, Accuracy: 5435/10000 (54%)


# Your solution is here


import jax.numpy as jnp
import jax
import numpy as np
import random

class Game:
    def __init__(self, key):
        # key is a jax.random.PRNGKey
        self.key = key
        return
    def hermione_chooses_r_and_W(self):
        self.r = random.randint(1, 95)
        self.key, subkey = jax.random.split(self.key)
        self.W1 = jax.random.uniform(subkey, (self.r, 100), maxval=100., minval=0., dtype=jnp.float32)
        self.key, subkey = jax.random.split(self.key)
        self.W2 = jax.random.uniform(subkey, (100, self.r), maxval=100., minval=0., dtype=jnp.float32)
    def hermione_computes_function(self, x):
        return jnp.sin(self.W2 @ jnp.cos(self.W1 @ x))
    def harry_answers(self):
        # <your code here>
        # you shouldn't use self.r, self.W1, or self.W2
        # you can call `hermione_computes_function` multiple times
        r = 1 # for example
        return r
    
    def play(self, n_rounds, verbose=True):
        # n_rounds: a number of rounds of the game
        # verbose: print or not the result of each round
        n_right_answers = 0
        for _ in range(n_rounds):
            self.hermione_chooses_r_and_W()
            r = self.harry_answers()
            if abs(r - self.r) == 0:
                if verbose:
                    print("Good job! The true answer is {}, Harry's answer is {}!".format(self.r, r))
                n_right_answers += 1
            else:
                if verbose:
                    print("Harry's answers is {}, but the true answer is {} :(".format(r, self.r))
        if float(n_right_answers) / n_rounds > 0.95:
            print('Well done: {}/{} right answers!'.format(n_right_answers, n_rounds))
        else:
            print('Only {}/{} right answers :( Work a little more and you will succeed!'.format(
                n_right_answers, n_rounds))


key = jax.random.PRNGKey(713)
game = Game(key)
game.play(n_rounds=100, verbose=False)

/Users/alex/anaconda3/envs/pytorch/lib/python3.6/site-packages/jax/lib/xla_bridge.py:122: UserWarning: No GPU/TPU found, falling back to CPU.
  warnings.warn('No GPU/TPU found, falling back to CPU.')

Only 1/100 right answers :( Work a little more and you will succeed!

Problem Set 1 (93 points)¶

Important information¶

Problem 1 (Theoretical tasks) (36 pts)¶

Problem 2 (Matrix calculus) (12 pts)¶

Problem 3. Compression of the fully-connected layers in neural network with simple architecture (30 pts)¶

Zero step: install PyTorch¶

First step: download CIFAR10 dataset¶

Check what images are we going to classify¶

Second step: neural network architecture¶

Implement functions for training and testing after every sweep over all dataset entries¶

Set parameters for training and print intermediate loss values¶

Third step: run training with the Adam optimization method¶

Problem 4. «Reducio!» (15 pts)¶

Problem 5 (Bonus)¶