CNN - Convolutional Neural Network
This is a tutorial of image classification using CNN (convolutional dataset)
- Defining the datasets and dataloaders
- Function to plot the image
- We can also create a grid of images in-order to understand the dataset
- Steps of a CNN
- Train vs Validation loss
- Prediction on the test images
- Prediction on the entire test set
![]()
In this blog we will be using a CIFAR10 dataset, which consists of 60000 32x32 px colour images in 10 classes. Our objective is to achieve the best accuracy
The dataset has a total of 60000 images belonging to the following classes 'plane', 'car', 'bird', 'cat','deer', 'dog', 'frog', 'horse', 'ship', 'truck')
import torch
import os
import torchvision
import torch.nn as nn
from torchvision.datasets import MNIST
from torchvision.datasets.utils import download_url
from torchvision.transforms import transforms
import matplotlib.pyplot as plt
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import random_split
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torchvision.utils import make_grid
import tarfile
import yaml
import numpy as np
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size = 150
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True,download=True, transform=transforms.ToTensor())
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False,download=True, transform=transforms.ToTensor())
# spltting the training - validation and test set
random_seed = 42
torch.manual_seed(random_seed)
train_dataset,val_dataset = random_split(train_dataset,[45000,5000])
# Defining the train - val - test dataloader
train_loader = DataLoader(train_dataset, batch_size=batch_size,shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size*2,shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size,shuffle=False)
#Classes of labels
classes = ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
def imshow(image):
#img = img / 2 + 0.5 # unnormalize
img,label = image
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
dataiter = iter(train_loader)
images, labels = dataiter.next()
# show images
fig, ax = plt.subplots(figsize=(20, 20))
imshow(torchvision.utils.make_grid(images,nrow=10))
plt.imshow(train_dataset[30][0].permute(1, 2, 0) );
train_loader = DataLoader(train_dataset,batch_size=100,shuffle = True)
val_loader = DataLoader(val_dataset,batch_size=100,shuffle = True)
layer1 = nn.Conv2d(3,62,5)
pooling = nn.MaxPool2d(2,2)
layer2 = nn.Conv2d(62,128,5)
layer3 = nn.Conv2d(128,128,5)
for batch,label in train_loader:
print(batch.shape, label.shape)
dta = layer1(batch)
print(dta.shape)
dta = pooling(dta)
print(dta.shape)
dta = layer2(dta)
print(dta.shape)
dta = pooling(dta)
print(dta.shape)
break
class convnet(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3,62,5)
self.pool = nn.MaxPool2d(2,2)
self.conv2 = nn.Conv2d(62,128,5)
self.linear1 = nn.Linear(128*5*5, 140)
self.linear2 = nn.Linear(140,80)
self.linear3 = nn.Linear(80,10)
def forward(self,x):
x = self.pool(F.relu(self.conv1(x))) # -> n, 6, 14, 14
x = self.pool(F.relu(self.conv2(x))) # -> n, 16, 5, 5
x = x.view(-1,128*5*5)
x = F.relu(self.linear1(x))
x = F.relu(self.linear2(x))
x = F.relu(self.linear3(x))
return x
def metric_acc(outputs, labels):
_, preds = torch.max(outputs, dim=1)
return torch.tensor(torch.sum(preds == labels).item() / len(preds))
def results_epoch(out_lst):
val_loss_epoch = torch.stack([dct['val_loss'] for dct in out_lst]).mean()
val_acc_epoch = torch.stack([dct['val_acc'] for dct in out_lst]).mean()
return {'val_loss': val_loss_epoch.item(), 'val_acc': val_acc_epoch.item()}
def final_output(dct,epoch):
print("Epoch [{}], train_loss:{:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(epoch, dct['train_loss'] ,dct['val_loss'], dct['val_acc']))
def validation(batch_val,model):
img_val,label_val = batch_val
img_val = img_val.to(device);label_val = label_val.to(device)
pred_val = model(img_val)
# loss is computed
loss_val = F.cross_entropy(pred_val,label_val)
# Accuracy is computed
acc_val = metric_acc(pred_val,label_val)
return {'val_loss': loss_val, 'val_acc': acc_val}
def check_scores(val_loader,model):
out_lst = [validation(batch_val,model) for batch_val in val_loader]
return results_epoch(out_lst)
def fit(epochs,learning_rate,model,train_loader,val_loader,opt_func=torch.optim.SGD):
optimizer = torch.optim.SGD(model.parameters(),lr = learning_rate, momentum = 0.9)
out_lst = [];loss_train = []
for epoch in range(epochs):
# Training step on the training dataloader
for batch in train_loader:
#extract batch of images and label
img,label = batch
img = img.to(device)
label = label.to(device)
#calculate prediction using the MNISTMODEL class initialized above
pred = model(img)
#Since this is a multi-label image classification model- the loss function is cross entropy
loss = F.cross_entropy(pred,label)
loss_train.append(loss)
# In this step we calculate the gradient of the loss function with respect to the parameters or 784 pixels in this case
loss.backward()
# In this step we update the weights
optimizer.step()
# We make the gradient zero again so that now the gradients are not calculated untill the training is not done
optimizer.zero_grad()
# Validation on the validation dataloader
output = check_scores(val_loader,model)
output['train_loss'] = torch.stack(loss_train).mean().item()
out_lst.append(output)
final_output(output,epoch)
return out_lst
model = convnet().to(device)
starting = check_scores(val_loader,model)
print(starting)
out_lst = fit(5,learning_rate=0.003,model= model,train_loader = train_loader,val_loader = val_loader,)
def plot_accuracy(out_lst):
plt.plot([a['val_acc'] for a in out_lst])
plt.xlabel('epoch')
plt.ylabel('accuracy_validation_set')
plt.title('epoch vs accuracy_validation_set')
plot_accuracy(out_lst)
def overfit_check(out_lst):
plt.plot([a['val_loss'] for a in out_lst])
plt.plot([a['train_loss'] for a in out_lst])
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['val_loss', 'train_loss'])
plt.title('check_overfiting_train_val_loss')
overfit_check(out_lst)
def pred_function(img,model,classes):
img = img.unsqueeze(0).to(device)
output = model(img)
prob,pred = torch.max(output,dim=1)
return pred.item(), classes[pred]
img,label = test_dataset[2]
imshow(test_dataset[2])
pred = pred_function(img,model,classes)
print('Label:', pred[0], ', Predicted:', pred[1])
img,label = test_dataset[200]
imshow(test_dataset[200])
pred = pred_function(img,model,classes)
print('Label:', pred[0], ', Predicted:', pred[1])
We can see that the accuracy on the entire test set is 68% which is not good
In the next blog we will how we can achieve state of the art results using a resnet architecture
check_scores(test_loader,model)