Using deep learning
Published

October 13, 2022

import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.optim as optim
import torch.nn as nn
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
import numpy as np

from shared.step_by_step import StepByStep

from torchviz import make_dot
plt.style.use('fivethirtyeight')
The autoreload extension is already loaded. To reload it, use:
%reload_ext autoreload

Let’s make up some data:

np.random.seed(43)
a0, a1, a2 = 5., -1.5, +4.
N = 100
x = -2.5 + 5*np.random.rand(N,1)
epsilon = np.random.randn(N,1)
y = a0 + a1*x + a2*x**2 + epsilon
plt.plot(x,y,'.')
plt.show()

# Using sklearn

Somewhat counterintuitively this problem is still linear regression. We just need to first convert x to new_x that basically contains extra features: $$x^0$$, $$x^1$$, and $$x^2$$. This is done via PolynomialFeatures:

poly = PolynomialFeatures(degree=2)
new_x = poly.fit_transform(x)  # for degree 2 we get $[1, x, x^2]$
new_x[:10]
array([[ 1.        , -1.92472717,  3.70457467],
[ 1.        ,  0.5453327 ,  0.29738775],
[ 1.        , -1.83304518,  3.36005463],
[ 1.        , -1.2970519 ,  1.68234363],
[ 1.        , -0.86430472,  0.74702265],
[ 1.        ,  1.79568745,  3.22449344],
[ 1.        ,  0.83045107,  0.68964897],
[ 1.        ,  0.20581106,  0.04235819],
[ 1.        , -2.35493088,  5.54569944],
[ 1.        ,  1.16874148,  1.36595665]])

We now progress as with the linear regression (not that using fit_intercept True or False just affects the way numbers are stored, all calculations are still there):

reg = LinearRegression(fit_intercept=False).fit(new_x, y)
r2_coef = reg.score(new_x, y)
print(reg.coef_, reg.intercept_, r2_coef)
[[ 4.98812164 -1.61954639  4.02342307]] 0.0 0.9831534879311261

## Same but using sklearn.pipeline

One can make the process more streamlined using Pipeline:

model = Pipeline([('poly', PolynomialFeatures(degree=2)),
('linear', LinearRegression(fit_intercept=False))])
# fit to an order-2 polynomial data
model = model.fit(x, y)
print(model.named_steps['linear'].coef_)
print(f'Real values {a0}, {a1}, {a2}')
[[ 4.98812164 -1.61954639  4.02342307]]
Real values 5.0, -1.5, 4.0

# Using PyTorch

Let’s first split data, create Datasets, and DataLoaders:

## Data Preparation

device = 'cuda' if torch.cuda.is_available() else 'cpu'
np.random.seed(42)
N = len(x)
idx = list(range(N))
np.random.shuffle(idx)
split_idx = int(.8*N)
train_idx = idx[:split_idx]
val_idx = idx[split_idx:]
train_x = torch.as_tensor(x[train_idx], device=device).float()
train_y = torch.as_tensor(y[train_idx], device=device).float()
val_x = torch.as_tensor(x[val_idx], device=device).float()
val_y = torch.as_tensor(y[val_idx], device=device).float()
train_dataset = TensorDataset(train_x, train_y)
val_dataset = TensorDataset(val_x, val_y)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)

## Training

model=nn.Sequential(
nn.Linear(1,10),
nn.ReLU(),
nn.Linear(10,1)
)
loss_fn = nn.MSELoss()

sbs = StepByStep(model, optimizer, loss_fn)

Let’s train for 200 epoch and plot losses:

sbs.set_loaders(train_loader, val_loader)
sbs.train(200)
sbs.plot_losses()

Let’s make predictions:

test =np.linspace(-4.,4.,num=N).reshape(-1,1)
test_predictions = sbs.predict(test)
plt.plot(x,y,'.')
plt.plot(test,test_predictions,'.')
plt.show()

This is good though, unfortunatelly, the true values of quadratic function are now lost in the sea of weights of the the two linear layers:

sbs.model.state_dict()
OrderedDict([('0.weight',
tensor([[ 1.3475],
[-2.2383],
[-2.1243],
[ 2.0004],
[-1.9875],
[-2.2052],
[ 0.1436],
[-1.8479],
[ 2.6974],
[ 2.1781]])),
('0.bias',
tensor([-1.2300, -3.2117,  0.8249, -1.5303, -0.2013, -2.3025,  1.3949, -0.0182,
0.2817, -3.1922])),
('2.weight',
tensor([[0.7446, 2.5052, 1.1556, 1.2103, 1.3438, 1.6768, 0.8039, 1.2448, 1.4132,
2.6946]])),
('2.bias', tensor([1.5188]))])