Quadratic function fitting

Using deep learning
Published

October 13, 2022

import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
import numpy as np

from shared.step_by_step import StepByStep

from torchviz import make_dot
plt.style.use('fivethirtyeight')
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

Let’s make up some data:

np.random.seed(43)
a0, a1, a2 = 5., -1.5, +4.
N = 100
x = -2.5 + 5*np.random.rand(N,1)
epsilon = np.random.randn(N,1)
y = a0 + a1*x + a2*x**2 + epsilon
plt.plot(x,y,'.')
plt.show()

Using sklearn

Somewhat counterintuitively this problem is still linear regression. We just need to first convert x to new_x that basically contains extra features: \(x^0\), \(x^1\), and \(x^2\). This is done via PolynomialFeatures:

poly = PolynomialFeatures(degree=2)
new_x = poly.fit_transform(x)  # for degree 2 we get $[1, x, x^2]$
new_x[:10]
array([[ 1.        , -1.92472717,  3.70457467],
       [ 1.        ,  0.5453327 ,  0.29738775],
       [ 1.        , -1.83304518,  3.36005463],
       [ 1.        , -1.2970519 ,  1.68234363],
       [ 1.        , -0.86430472,  0.74702265],
       [ 1.        ,  1.79568745,  3.22449344],
       [ 1.        ,  0.83045107,  0.68964897],
       [ 1.        ,  0.20581106,  0.04235819],
       [ 1.        , -2.35493088,  5.54569944],
       [ 1.        ,  1.16874148,  1.36595665]])

We now progress as with the linear regression (not that using fit_intercept True or False just affects the way numbers are stored, all calculations are still there):

reg = LinearRegression(fit_intercept=False).fit(new_x, y)
r2_coef = reg.score(new_x, y)
print(reg.coef_, reg.intercept_, r2_coef)
[[ 4.98812164 -1.61954639  4.02342307]] 0.0 0.9831534879311261

Same but using sklearn.pipeline

One can make the process more streamlined using Pipeline:

model = Pipeline([('poly', PolynomialFeatures(degree=2)),
                  ('linear', LinearRegression(fit_intercept=False))])
# fit to an order-2 polynomial data
model = model.fit(x, y)
print(model.named_steps['linear'].coef_)
print(f'Real values {a0}, {a1}, {a2}')
[[ 4.98812164 -1.61954639  4.02342307]]
Real values 5.0, -1.5, 4.0

Using PyTorch

Let’s first split data, create Datasets, and DataLoaders:

Data Preparation

device = 'cuda' if torch.cuda.is_available() else 'cpu'
np.random.seed(42)
N = len(x)
idx = list(range(N))
np.random.shuffle(idx)
split_idx = int(.8*N)
train_idx = idx[:split_idx]
val_idx = idx[split_idx:]
train_x = torch.as_tensor(x[train_idx], device=device).float()
train_y = torch.as_tensor(y[train_idx], device=device).float()
val_x = torch.as_tensor(x[val_idx], device=device).float()
val_y = torch.as_tensor(y[val_idx], device=device).float()
train_dataset = TensorDataset(train_x, train_y)
val_dataset = TensorDataset(val_x, val_y)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)

Training

model=nn.Sequential(
        nn.Linear(1,10),
        nn.ReLU(),
        nn.Linear(10,1)
)
optimizer = optim.Adam(model.parameters(), lr=0.1)
loss_fn = nn.MSELoss()

sbs = StepByStep(model, optimizer, loss_fn)

Let’s train for 200 epoch and plot losses:

sbs.set_loaders(train_loader, val_loader)
sbs.train(200)
sbs.plot_losses()

Let’s make predictions:

test =np.linspace(-4.,4.,num=N).reshape(-1,1)
test_predictions = sbs.predict(test)
plt.plot(x,y,'.')
plt.plot(test,test_predictions,'.')
plt.show()

This is good though, unfortunatelly, the true values of quadratic function are now lost in the sea of weights of the the two linear layers:

sbs.model.state_dict()
OrderedDict([('0.weight',
              tensor([[ 1.3475],
                      [-2.2383],
                      [-2.1243],
                      [ 2.0004],
                      [-1.9875],
                      [-2.2052],
                      [ 0.1436],
                      [-1.8479],
                      [ 2.6974],
                      [ 2.1781]])),
             ('0.bias',
              tensor([-1.2300, -3.2117,  0.8249, -1.5303, -0.2013, -2.3025,  1.3949, -0.0182,
                       0.2817, -3.1922])),
             ('2.weight',
              tensor([[0.7446, 2.5052, 1.1556, 1.2103, 1.3438, 1.6768, 0.8039, 1.2448, 1.4132,
                       2.6946]])),
             ('2.bias', tensor([1.5188]))])