This notebook is an implementation of a gradient descent using PYTORCH

gradient_descent is the foundation of all deep learning problems. It is super critical tp understand this

import numpy as np
import torch
import pandas as pd
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
import  torch.nn as nn
import torch.nn.functional as F
inp = np.arange(58,387,step = 5.5, dtype=np.float32).reshape(10,6)
inp

array([[ 58. ,  63.5,  69. ,  74.5,  80. ,  85.5],
       [ 91. ,  96.5, 102. , 107.5, 113. , 118.5],
       [124. , 129.5, 135. , 140.5, 146. , 151.5],
       [157. , 162.5, 168. , 173.5, 179. , 184.5],
       [190. , 195.5, 201. , 206.5, 212. , 217.5],
       [223. , 228.5, 234. , 239.5, 245. , 250.5],
       [256. , 261.5, 267. , 272.5, 278. , 283.5],
       [289. , 294.5, 300. , 305.5, 311. , 316.5],
       [322. , 327.5, 333. , 338.5, 344. , 349.5],
       [355. , 360.5, 366. , 371.5, 377. , 382.5]], dtype=float32)
inp.dtype, inp.shape

(dtype('float32'), (10, 6))
actual = np.arange(6000,9000,step = 320.7,dtype=np.float32).reshape(10,1)
actual

array([[6000.    ],
       [6320.7   ],
       [6641.4004],
       [6962.1006],
       [7282.801 ],
       [7603.501 ],
       [7924.201 ],
       [8244.901 ],
       [8565.602 ],
       [8886.302 ]], dtype=float32)
actual.dtype, actual.shape

(dtype('float32'), (10, 1))
inp= torch.from_numpy(inp)
actual = torch.from_numpy(actual)

Convert to dataset and dataloader

ds = TensorDataset(inp, actual)

# To divide the data into train and validation set we use random_split
# [8,2] splits the data into training and validation dataset
#tr_dataset, val_dataset = random_split(ds,[8,2])

# Dataloader helps to split data into batches and shuffling the data
train_loader = DataLoader(ds, shuffle=True)

# datatypes of dataset an train_loader
type(ds) , type(train_loader)
(torch.utils.data.dataset.TensorDataset,
 torch.utils.data.dataloader.DataLoader)

A dataloader always gives a tuple of the training data and the label with it

#Iteration to see what a dataloader provides
for data,label in train_loader:
    print(data)
    print(label)

tensor([[190.0000, 195.5000, 201.0000, 206.5000, 212.0000, 217.5000]])
tensor([[7282.8008]])
tensor([[322.0000, 327.5000, 333.0000, 338.5000, 344.0000, 349.5000]])
tensor([[8565.6016]])
tensor([[ 91.0000,  96.5000, 102.0000, 107.5000, 113.0000, 118.5000]])
tensor([[6320.7002]])
tensor([[256.0000, 261.5000, 267.0000, 272.5000, 278.0000, 283.5000]])
tensor([[7924.2012]])
tensor([[124.0000, 129.5000, 135.0000, 140.5000, 146.0000, 151.5000]])
tensor([[6641.4004]])
tensor([[289.0000, 294.5000, 300.0000, 305.5000, 311.0000, 316.5000]])
tensor([[8244.9014]])
tensor([[58.0000, 63.5000, 69.0000, 74.5000, 80.0000, 85.5000]])
tensor([[6000.]])
tensor([[355.0000, 360.5000, 366.0000, 371.5000, 377.0000, 382.5000]])
tensor([[8886.3018]])
tensor([[157.0000, 162.5000, 168.0000, 173.5000, 179.0000, 184.5000]])
tensor([[6962.1006]])
tensor([[223.0000, 228.5000, 234.0000, 239.5000, 245.0000, 250.5000]])
tensor([[7603.5010]])

Create a model using NN.Linear and pass the input through it to generate to produce the output

# inputs are the number of columns in a tabular dataset
# outputs can be the number of outputs
input_size = 6
output_size = 1
model = nn.Linear(input_size,output_size)

# To look at the weight and bias use a parameter method
list(model.parameters())

# using the model to generate predictions
predictions = model(inp)
actual.shape, predictions.shape
(torch.Size([10, 1]), torch.Size([10, 1]))

Calculate the loss

# Now the loss function can be computed by directly using F.mse_loss
loss = F.mse_loss(predictions,actual)

# Models wts and bias can be updated automatically using a optimizer
opt = torch.optim.SGD(model.parameters(),lr = 1e-6)

opt

SGD (
Parameter Group 0
    dampening: 0
    lr: 1e-06
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)

Run the process of Gradient_Descent to find optimal weights and bias

# Steps for nn implementation
num_epochs = 2000
for epoch in range(num_epochs):
    for data,label in train_loader:
        # preds
        preds = model(data)
        # calculate loss
        loss = F.mse_loss(preds,label)
        # gradient calculation
        loss.backward()
        # update wts and bias wrt loss
        opt.step()
        # gradients values are 0 again
        opt.zero_grad()
    if epoch%30==0:
        print(loss)

tensor(1276.5966, grad_fn=<MseLossBackward0>)
tensor(2668.6709, grad_fn=<MseLossBackward0>)
tensor(7169.9878, grad_fn=<MseLossBackward0>)
tensor(469.6278, grad_fn=<MseLossBackward0>)
tensor(6568.8335, grad_fn=<MseLossBackward0>)
tensor(1190.0479, grad_fn=<MseLossBackward0>)
tensor(325.2668, grad_fn=<MseLossBackward0>)
tensor(5114.6245, grad_fn=<MseLossBackward0>)
tensor(1722.8174, grad_fn=<MseLossBackward0>)
tensor(1077.5576, grad_fn=<MseLossBackward0>)
tensor(160.6259, grad_fn=<MseLossBackward0>)
tensor(375.2014, grad_fn=<MseLossBackward0>)
tensor(4391.7803, grad_fn=<MseLossBackward0>)
tensor(2078.6121, grad_fn=<MseLossBackward0>)
tensor(2570.0146, grad_fn=<MseLossBackward0>)
tensor(725.0766, grad_fn=<MseLossBackward0>)
tensor(2081.1060, grad_fn=<MseLossBackward0>)
tensor(873.6527, grad_fn=<MseLossBackward0>)
tensor(83.2389, grad_fn=<MseLossBackward0>)
tensor(1916.8406, grad_fn=<MseLossBackward0>)
tensor(719.1458, grad_fn=<MseLossBackward0>)
tensor(1902.9702, grad_fn=<MseLossBackward0>)
tensor(42.3262, grad_fn=<MseLossBackward0>)
tensor(763.7343, grad_fn=<MseLossBackward0>)
tensor(365.8590, grad_fn=<MseLossBackward0>)
tensor(112.0207, grad_fn=<MseLossBackward0>)
tensor(925.1934, grad_fn=<MseLossBackward0>)
tensor(158.8609, grad_fn=<MseLossBackward0>)
tensor(363.2858, grad_fn=<MseLossBackward0>)
tensor(0.4345, grad_fn=<MseLossBackward0>)
tensor(453.9729, grad_fn=<MseLossBackward0>)
tensor(123.0280, grad_fn=<MseLossBackward0>)
tensor(30.7137, grad_fn=<MseLossBackward0>)
tensor(350.9402, grad_fn=<MseLossBackward0>)
tensor(89.2324, grad_fn=<MseLossBackward0>)
tensor(296.6872, grad_fn=<MseLossBackward0>)
tensor(19.0595, grad_fn=<MseLossBackward0>)
tensor(20.8788, grad_fn=<MseLossBackward0>)
tensor(83.6939, grad_fn=<MseLossBackward0>)
tensor(173.1255, grad_fn=<MseLossBackward0>)
tensor(155.1168, grad_fn=<MseLossBackward0>)
tensor(125.8494, grad_fn=<MseLossBackward0>)
tensor(179.1650, grad_fn=<MseLossBackward0>)
tensor(92.3213, grad_fn=<MseLossBackward0>)
tensor(23.1508, grad_fn=<MseLossBackward0>)
tensor(34.4067, grad_fn=<MseLossBackward0>)
tensor(54.5636, grad_fn=<MseLossBackward0>)
tensor(18.6440, grad_fn=<MseLossBackward0>)
tensor(145.8339, grad_fn=<MseLossBackward0>)
tensor(47.0510, grad_fn=<MseLossBackward0>)
tensor(21.9315, grad_fn=<MseLossBackward0>)
tensor(59.0601, grad_fn=<MseLossBackward0>)
tensor(5.9200, grad_fn=<MseLossBackward0>)
tensor(27.8658, grad_fn=<MseLossBackward0>)
tensor(23.2401, grad_fn=<MseLossBackward0>)
tensor(2.5090, grad_fn=<MseLossBackward0>)
tensor(5.5459, grad_fn=<MseLossBackward0>)
tensor(12.6879, grad_fn=<MseLossBackward0>)
tensor(32.2145, grad_fn=<MseLossBackward0>)
tensor(31.1918, grad_fn=<MseLossBackward0>)
tensor(1.8559, grad_fn=<MseLossBackward0>)
tensor(14.7257, grad_fn=<MseLossBackward0>)
tensor(8.5545, grad_fn=<MseLossBackward0>)
tensor(6.6442, grad_fn=<MseLossBackward0>)
tensor(18.0003, grad_fn=<MseLossBackward0>)
tensor(11.6892, grad_fn=<MseLossBackward0>)
tensor(1.4676, grad_fn=<MseLossBackward0>)

Now we can compare the results obtained by the model after the weights are updated and gradient_descent is run

We can see that we are quite close to the actual value

# predictions after the weights are updated
model(inp)

tensor([[5996.6123],
        [6318.0566],
        [6639.5049],
        [6960.9502],
        [7282.3916],
        [7603.8350],
        [7925.2881],
        [8246.7295],
        [8568.1748],
        [8889.6162]], grad_fn=<AddmmBackward0>)
# Actual values from the model
actual

tensor([[6000.0000],
        [6320.7002],
        [6641.4004],
        [6962.1006],
        [7282.8008],
        [7603.5010],
        [7924.2012],
        [8244.9014],
        [8565.6016],
        [8886.3018]])