import collections
from d2l import torch as d2l
import math
import torch
import torchvision
from torch import nn
import os
import pandas as pd
import shutilA capstone deck: assemble everything from the chapter (augmentation, fine-tuning, modern CNN architectures) and take a Kaggle competition. CIFAR-10 has been done to death, but it’s the right size for a teaching example — small enough to fit in memory, big enough that augmentation and ensembling matter.
Kaggle CIFAR-10 competition page.
Tiny demo subset for the book; swap in the full dataset for the actual competition:
d2l.DATA_HUB['cifar10_tiny'] = (d2l.DATA_URL + 'kaggle_cifar10_tiny.zip',
'2068874e4b9a9f0fb07ebe0ad2b29754449ccacd')
# If you use the full dataset downloaded for the Kaggle competition, set
# `demo` to False
demo = True
if demo:
data_dir = d2l.download_extract('cifar10_tiny')
else:
data_dir = '../data/cifar-10/'Kaggle ships everything in one folder; standard torchvision-style training expects train/<class>/img.png. Build that layout from the labels.csv:
def read_csv_labels(fname):
"""Read `fname` to return a filename to label dictionary."""
with open(fname, 'r') as f:
# Skip the file header line (column name)
lines = f.readlines()[1:]
tokens = [l.rstrip().split(',') for l in lines]
return dict(((name, label) for name, label in tokens))
labels = read_csv_labels(os.path.join(data_dir, 'trainLabels.csv'))
print('# training examples:', len(labels))
print('# classes:', len(set(labels.values())))# training examples: 1000
# classes: 10
def copyfile(filename, target_dir):
"""Copy a file into a target directory."""
os.makedirs(target_dir, exist_ok=True)
shutil.copy(filename, target_dir)
def reorg_train_valid(data_dir, labels, valid_ratio):
"""Split the validation set out of the original training set."""
# The number of examples of the class that has the fewest examples in the
# training dataset
n = collections.Counter(labels.values()).most_common()[-1][1]
# The number of examples per class for the validation set
n_valid_per_label = max(1, math.floor(n * valid_ratio))
label_count = {}
for train_file in os.listdir(os.path.join(data_dir, 'train')):
label = labels[train_file.split('.')[0]]
fname = os.path.join(data_dir, 'train', train_file)
copyfile(fname, os.path.join(data_dir, 'train_valid_test',
'train_valid', label))
if label not in label_count or label_count[label] < n_valid_per_label:
copyfile(fname, os.path.join(data_dir, 'train_valid_test',
'valid', label))
label_count[label] = label_count.get(label, 0) + 1
else:
copyfile(fname, os.path.join(data_dir, 'train_valid_test',
'train', label))
return n_valid_per_labelStandard recipe — random crop, flip, normalize for train; just normalize for eval:
transform_train = torchvision.transforms.Compose([
# Scale the image up to a square of 40 pixels in both height and width
torchvision.transforms.Resize(40),
# Randomly crop a square image of 40 pixels in both height and width to
# produce a small square of 0.64 to 1 times the area of the original
# image, and then scale it to a square of 32 pixels in both height and
# width
torchvision.transforms.RandomResizedCrop(32, scale=(0.64, 1.0),
ratio=(1.0, 1.0)),
torchvision.transforms.RandomHorizontalFlip(),
torchvision.transforms.ToTensor(),
# Standardize each channel of the image
torchvision.transforms.Normalize([0.4914, 0.4822, 0.4465],
[0.2023, 0.1994, 0.2010])])Folder-based dataset + the augmentation pipelines:
train_ds, train_valid_ds = [torchvision.datasets.ImageFolder(
os.path.join(data_dir, 'train_valid_test', folder),
transform=transform_train) for folder in ['train', 'train_valid']]
valid_ds, test_ds = [torchvision.datasets.ImageFolder(
os.path.join(data_dir, 'train_valid_test', folder),
transform=transform_test) for folder in ['valid', 'test']]train_iter, train_valid_iter = [torch.utils.data.DataLoader(
dataset, batch_size, shuffle=True, drop_last=True)
for dataset in (train_ds, train_valid_ds)]
valid_iter = torch.utils.data.DataLoader(valid_ds, batch_size, shuffle=False,
drop_last=True)
test_iter = torch.utils.data.DataLoader(test_ds, batch_size, shuffle=False,
drop_last=False)No transfer learning this time — CIFAR-10 is small enough to train from scratch. The core unit is the same residual block from the ResNet chapter: two 3×3 convs plus an identity or 1×1 projection shortcut.
Four residual stages progressively downsample the image and widen channels. Global average pooling removes spatial dimensions; the final dense layer emits 10 class logits:
Across frameworks, get_net returns the same contract: input minibatches of CIFAR-10 images, output logits with shape (batch, 10), and cross-entropy as the training loss.
SGD with momentum + weight decay + LR step decay is the classic small-image vision recipe. The long helper mainly adapts that recipe to each framework, so teach the invariant loop:
Use the validation split for model selection. Training loss should decline smoothly; validation accuracy is the signal for whether augmentation and the learning-rate schedule are helping rather than just fitting the train set.
devices, num_epochs, lr, wd = d2l.try_all_gpus(), 20, 0.001, 5e-4
lr_period, lr_decay, net = 4, 0.9, get_net()
net(next(iter(train_iter))[0])
def init_weights(module):
if type(module) in [nn.Linear, nn.Conv2d]:
nn.init.kaiming_normal_(module.weight, nonlinearity='relu')
net.apply(init_weights)
train(net, train_iter, valid_iter, num_epochs, lr, wd, devices, lr_period,
lr_decay)train loss 1.520, train acc 0.421, best valid acc 0.438
2448.9 examples/sec on [device(type='cuda', index=0)]
Run on the test set, write a Kaggle-format CSV:
net, preds = get_net(), []
net(next(iter(train_valid_iter))[0])
train(net, train_valid_iter, None, num_epochs, lr, wd, devices, lr_period,
lr_decay)
for X, _ in test_iter:
y_hat = net(X.to(devices[0]))
preds.extend(y_hat.argmax(dim=1).type(torch.int32).cpu().numpy())
sorted_ids = list(range(1, len(test_ds) + 1))
sorted_ids.sort(key=lambda x: str(x))
df = pd.DataFrame({'id': sorted_ids, 'label': preds})
df['label'] = df['label'].apply(lambda x: train_valid_ds.classes[x])
df.to_csv('submission.csv', index=False)train loss 1.361, train acc 0.477
2820.6 examples/sec on [device(type='cuda', index=0)]