# Torchvision, Conversions, Samplers

On conversions and transforms
Published

November 3, 2022

Here we’ll take a problem of binary image classifing.

Code
``````from IPython.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from shared.step_by_step import StepByStep
import platform
from PIL import Image
import datetime
import matplotlib.pyplot as plt
from matplotlib import cm
from torch.utils.data import DataLoader, Dataset, random_split, WeightedRandomSampler, SubsetRandomSampler
from torchvision.transforms import Compose, ToTensor, Normalize, ToPILImage, RandomHorizontalFlip, Resize

plt.style.use('fivethirtyeight')``````
Code
``````def show_image(im, cmap=None):
fig = plt.figure(figsize=(3,3))
plt.imshow(im, cmap=cmap)
plt.grid(False)
plt.show()

def gen_img(start, target, fill=1, img_size=10):
# Generates empty image
img = np.zeros((img_size, img_size), dtype=float)

start_row, start_col = None, None

if start > 0:
start_row = start
else:
start_col = np.abs(start)

if target == 0:
if start_row is None:
img[:, start_col] = fill
else:
img[start_row, :] = fill
else:
if start_col == 0:
start_col = 1

if target == 1:
if start_row is not None:
up = (range(start_row, -1, -1),
range(0, start_row + 1))
else:
up = (range(img_size - 1, start_col - 1, -1),
range(start_col, img_size))
img[up] = fill
else:
if start_row is not None:
down = (range(start_row, img_size, 1),
range(0, img_size - start_row))
else:
down = (range(0, img_size - 1 - start_col + 1),
range(start_col, img_size))
img[down] = fill

return 255 * img.reshape(1, img_size, img_size)

def generate_dataset(img_size=10, n_images=100, binary=True, seed=17):
np.random.seed(seed)

starts = np.random.randint(-(img_size - 1), img_size, size=(n_images,))
targets = np.random.randint(0, 3, size=(n_images,))

images = np.array([gen_img(s, t, img_size=img_size)
for s, t in zip(starts, targets)], dtype=np.uint8)

if binary:
targets = (targets > 0).astype(int)

return images, targets

def plot_images(images, targets, n_plot=30):
n_rows = n_plot // 6 + ((n_plot % 6) > 0)
fig, axes = plt.subplots(n_rows, 6, figsize=(9, 1.5 * n_rows))
axes = np.atleast_2d(axes)

for i, (image, target) in enumerate(zip(images[:n_plot], targets[:n_plot])):
row, col = i // 6, i % 6
ax = axes[row, col]
ax.set_title('#{} - Label:{}'.format(i, target), {'size': 12})
# plot filter channel in grayscale
ax.imshow(image.squeeze(), cmap='gray', vmin=0, vmax=1)

for ax in axes.flat:
ax.set_xticks([])
ax.set_yticks([])
ax.label_outer()

plt.tight_layout()
return fig

class TransformedTensorDataset(Dataset):
def __init__(self, x, y, transform=None):
self.x = x
self.y = y
self.transform = transform

def __getitem__(self, index):
x = self.x[index]

if self.transform:
x = self.transform(x)

return x, self.y[index]

def __len__(self):
return len(self.x)``````
Code
``````images, labels = generate_dataset(img_size=5, n_images=300, binary=True, seed=13)
x_tensor = torch.as_tensor(images / 255.).float()
y_tensor = torch.as_tensor(labels.reshape(-1, 1)).float()  # reshaped this to (N,1) tensor
torch.manual_seed(13)
N = len(x_tensor)
n_train = int(.8*N)
n_val = N - n_train
train_subset, val_subset = random_split(x_tensor, [n_train, n_val])
train_idx = train_subset.indices
val_idx = val_subset.indices

#|code-fold: true
#|output: false
lr = 0.1

# Now we can create a model
model_deeper = nn.Sequential()

# Defines a SGD optimizer to update the parameters
optimizer_deeper = optim.SGD(model_deeper.parameters(), lr=lr)

# Defines a binary cross entropy loss function
binary_loss_fn = nn.BCELoss()
sbs_deeper = StepByStep(model_deeper, optimizer_deeper, binary_loss_fn)``````

## Note on NCHW vs NHWC formats

The format for images is different between libraries: - PyTorch uses NCHW - TensorFlow uses NHWC - PIL/Matplotlib images are HWC.

``````image_r  = np.zeros((5, 5), dtype=np.uint8)
image_r[:, 0] = 255
image_r[:, 1] = 128

image_g = np.zeros((5, 5), dtype=np.uint8)
image_g[:, 1] = 128
image_g[:, 2] = 255
image_g[:, 3] = 128

image_b = np.zeros((5, 5), dtype=np.uint8)
image_b[:, 3] = 128
image_b[:, 4] = 255``````
Code
``````show_image(image_r, 'gray')
zeros = np.zeros((5,5), dtype=np.uint8)
stacked_red = np.zeros((5,5,3), dtype=np.uint8)  # the format is HxWxX (height X width X channel)
stacked_red[:,:,0] = image_r
# also same thing can be achieved stacked_red = np.stack([image_r, zeros, zeros], axis=2)
show_image(stacked_red)``````
Code
``````def image_channels(red, green, blue, rgb, gray, rows=(0, 1, 2)):
fig, axs = plt.subplots(len(rows), 4, figsize=(15, 5.5))

zeros = np.zeros((5, 5), dtype=np.uint8)

titles1 = ['Red', 'Green', 'Blue', 'Grayscale Image']
titles0 = ['image_r', 'image_g', 'image_b', 'image_gray']
titles2 = ['as first channel', 'as second channel', 'as third channel', 'RGB Image']

idx0 = np.argmax(np.array(rows) == 0)
idx1 = np.argmax(np.array(rows) == 1)
idx2 = np.argmax(np.array(rows) == 2)

for i, m in enumerate([red, green, blue, gray]):
if 0 in rows:
axs[idx0, i].axis('off')
axs[idx0, i].invert_yaxis()
if (1 in rows) or (i < 3):
axs[idx0, i].text(0.15, 0.25, str(m.astype(np.uint8)), verticalalignment='top')
axs[idx0, i].set_title(titles0[i], fontsize=16)

if 1 in rows:
axs[idx1, i].set_title(titles1[i], fontsize=16)
axs[idx1, i].set_xlabel('5x5', fontsize=14)
axs[idx1, i].imshow(m, cmap=plt.cm.gray)

if 2 in rows:
axs[idx2, i].set_title(titles2[i], fontsize=16)
axs[idx2, i].set_xlabel(f'5x5x3 - {titles1[i][0]} only', fontsize=14)
if i < 3:
stacked = [zeros] * 3
stacked[i] = m
axs[idx2, i].imshow(np.stack(stacked, axis=2))
else:
axs[idx2, i].imshow(rgb)

for r in [1, 2]:
if r in rows:
idx = idx1 if r == 1 else idx2
axs[idx, i].set_xticks([])
axs[idx, i].set_yticks([])
for k, v in axs[idx, i].spines.items():
v.set_color('black')
v.set_linewidth(.8)

if 1 in rows:
axs[idx1, 3].set_xlabel('5x5 = 0.21R + 0.72G + 0.07B')
if 2 in rows:
axs[idx2, 3].set_xlabel('5x5x3 = (R, G, B) stacked')
fig.tight_layout()
return fig``````
``````image_gray = .2126*image_r + .7152*image_g + .0722*image_b
image_rgb = np.stack([image_r, image_g, image_b], axis=2)
fig = image_channels(image_r, image_g, image_b, image_rgb, image_gray, rows=(0, 1))``````

``fig = image_channels(image_r, image_g, image_b, image_rgb, image_gray, rows=(0, 2))``

## Note on Torchvision

`torchvision` has many existing `Datasets`, `Models`, and `Transformations`.

### Conversion between ndarray, PIL, and tensor

``images.shape``
``(300, 1, 5, 5)``
``````example_chw = images[3]
example_chw``````
``````array([[[  0,   0, 255,   0,   0],
[  0, 255,   0,   0,   0],
[255,   0,   0,   0,   0],
[  0,   0,   0,   0,   0],
[  0,   0,   0,   0,   0]]], dtype=uint8)``````
``example_chw.shape``
``(1, 5, 5)``
``````example_hwc = np.transpose(example_chw, (1,2,0))
example_hwc.shape``````
``(5, 5, 1)``
``show_image(example_hwc, 'gray')``

`ToTensor`: Converts a PIL Image or numpy.ndarray (H x W x C) in the range [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0] if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1) or if the numpy.ndarray has dtype = np.uint8

`torch.as_tensor` can work on N-dimensional arrays (ToTensor works only on 2D/3D images), but doesn’t apply scalling and, just fyi, it shares data i.e. doesn’t make a copy.

``````tensorizer = ToTensor()
example_tensor = tensorizer(example_hwc)
example_tensor.shape``````
``torch.Size([1, 5, 5])``
``example_tensor``
``````tensor([[[0., 0., 1., 0., 0.],
[0., 1., 0., 0., 0.],
[1., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.]]])``````
``type(example_tensor)``
``torch.Tensor``
``````example_pil = ToPILImage()(example_tensor)  # similar to np.transpose(example_tensor, (1,2,0))
print(type(example_pil))``````
``<class 'PIL.Image.Image'>``
``show_image(example_pil, 'gray')``

To convert between Tensor and Numpy:

``````example_np = example_tensor.detach().cpu().numpy()
print(type(example_np))``````
``<class 'numpy.ndarray'>``

To convert between numpy and PIL:

``````example_np = np.array(example_pil)
print(type(example_np))``````
``<class 'numpy.ndarray'>``
``````example_pil_back = Image.fromarray(example_np)
print(type(example_pil_back))``````
``<class 'PIL.Image.Image'>``

more options for different PIL modes: (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1).

One has to be careful: - PIL gray images (L mode) need to be squeezed from 3D to 2D via np.squeeze() - RGB mode images should be floats between [0,1] - L mode (grayscale) images should be np.uint8 from [0,255] - 1 mode (bool) images are True or False.

``````# PIL_image = Image.fromarray(np.uint8(example_np)).convert('RGB')
# PIL_image = Image.fromarray(example_np.astype('uint8'), 'RGB')``````
``show_image(example_pil_back, 'gray')``

### Transformations

There are many transformations that can be done on tensors and/or PIL images:

``````flipper = RandomHorizontalFlip(p=1.0)
show_image(flipper(example_pil), 'gray')``````

``````print(example_tensor)
normalizer = Normalize(mean=.5, std=.5)
normalized_tensor = normalizer(example_tensor)
print(normalized_tensor)``````
``````tensor([[[0., 0., 1., 0., 0.],
[0., 1., 0., 0., 0.],
[1., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.]]])
tensor([[[-1., -1.,  1., -1., -1.],
[-1.,  1., -1., -1., -1.],
[ 1., -1., -1., -1., -1.],
[-1., -1., -1., -1., -1.],
[-1., -1., -1., -1., -1.]]])``````

### Composing transforms

``````composer = Compose([RandomHorizontalFlip(p=1.0),
Normalize(mean=(.5,), std=(.5,))])
composer(example_tensor)``````
``````tensor([[[-1., -1.,  1., -1., -1.],
[-1., -1., -1.,  1., -1.],
[-1., -1., -1., -1.,  1.],
[-1., -1., -1., -1., -1.],
[-1., -1., -1., -1., -1.]]])``````
``torch.as_tensor(example_np/255).float()``
``````tensor([[0., 0., 1., 0., 0.],
[0., 1., 0., 0., 0.],
[1., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.]])``````

If we want to convert the whole numpy array to tensor we can use:

``````example_tensor = torch.as_tensor(example_np / 255).float()
example_tensor``````
``````tensor([[0., 0., 1., 0., 0.],
[0., 1., 0., 0., 0.],
[1., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.]])``````

PyTorch default floating point `dtype` is `torch.float32`, one can change this if needed via `torch.set_default_dtype(torch.float64)`.

## Samplers

One can also define a `Sampler` (and it’s subclasses: `SequentialSampler`, `RandomSampler`, `SubsetRandomSampler`, `WeightedRandomSampler`, `BatchSampler`, and `DistributedSampler`), use `WeightedRandomSampler` for example in case data is unbalanced.

``````composer = Compose([RandomHorizontalFlip(p=0.5),
Normalize(mean=.5, std=.5)])

dataset = TransformedTensorDataset(x_tensor, y_tensor, transform=composer)

train_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(val_idx)``````
``````# Builds a loader of each set

Note that we need `val_sampler` just because we are passing the full dataset.

Also note that when using `Sampler`s, one can’t use `shuffle=True` in `DataLoaders`.

``len(iter(train_loader)), len(iter(val_loader))``
``(15, 4)``

## Some Model methods

To see the layers:

``sbs_deeper.model``
``````Sequential(
(flatten): Flatten(start_dim=1, end_dim=-1)
(linear1): Linear(in_features=25, out_features=10, bias=True)
(relu): ReLU()
(linear2): Linear(in_features=10, out_features=1, bias=True)
(sigmoid): Sigmoid()
)``````
``list(sbs_deeper.model.named_modules())``
``````[('',
Sequential(
(flatten): Flatten(start_dim=1, end_dim=-1)
(linear1): Linear(in_features=25, out_features=10, bias=True)
(relu): ReLU()
(linear2): Linear(in_features=10, out_features=1, bias=True)
(sigmoid): Sigmoid()
)),
('flatten', Flatten(start_dim=1, end_dim=-1)),
('linear1', Linear(in_features=25, out_features=10, bias=True)),
('relu', ReLU()),
('linear2', Linear(in_features=10, out_features=1, bias=True)),
('sigmoid', Sigmoid())]``````

To see particular weights in a layer:

``sbs_deeper.model.linear2.weight.detach()``
``````tensor([[ 0.1024,  0.2038,  0.2085, -0.2298,  0.1973, -0.2550,  0.2233, -0.0626,
-0.0030,  0.0018]])``````
``sbs_deeper.count_parameters()``
``271``

The 271 comes from: (25 * 10 + 10 biases) + (10 * 1 + 1 bias) = 260 + 11 = 271

to reset parameters I implemented this way but this doesn’t reset all parameters (for example in `nn.PReLU`) so use caution!

``sbs_deeper.reset_parameters()``