from d2l import tensorflow as d2l
import tensorflow as tf
import keras
import numpy as np
import osA second Kaggle capstone: ImageNet Dogs (120 fine-grained breeds). The big difference from CIFAR-10: this is a subset of ImageNet, so a pretrained ResNet already knows almost everything about these classes. Fine-tuning is the right play.
Kaggle “Dog Breed Identification” page.
d2l.DATA_HUB['dog_tiny'] = (d2l.DATA_URL + 'kaggle_dog_tiny.zip',
'0cb91d09b814ecdc07b50f31f8dcad3e81d6a86d')
# If you use the full dataset downloaded for the Kaggle competition, change
# the variable below to `False`
demo = True
if demo:
data_dir = d2l.download_extract('dog_tiny')
else:
data_dir = os.path.join('..', 'data', 'dog-breed-identification')Same idea as CIFAR-10 — reshuffle the Kaggle layout into train/<class>/img.jpg for the standard ImageFolder loader:
ImageNet-scale augmentation: random resized crop, random horizontal flip, color jitter, and the same input preprocessing convention the pretrained backbone expects:
def transform_train_fn(image, label):
"""Training augmentation: random crop, flip, color jitter, normalize."""
image = tf.cast(image, tf.float32)
# Random resized crop to 224x224
image = tf.image.resize(image, [256, 256])
image = tf.image.random_crop(image, size=[224, 224, 3])
image = tf.image.random_flip_left_right(image)
image = tf.image.random_brightness(image, max_delta=0.4 * 255)
image = tf.image.random_contrast(image, lower=0.6, upper=1.4)
image = tf.image.random_saturation(image, lower=0.6, upper=1.4)
image = tf.clip_by_value(image, 0.0, 255.0)
return tf.keras.applications.resnet50.preprocess_input(image), labeldef transform_test_fn(image, label):
"""Test preprocessing: resize, center crop, normalize."""
image = tf.cast(image, tf.float32)
image = tf.image.resize(image, [256, 256])
# Center crop to 224x224
image = tf.image.resize_with_crop_or_pad(image, 224, 224)
return tf.keras.applications.resnet50.preprocess_input(image), labeldef _load_image_folder_tf(folder_path):
"""Load images from a class-subfolder directory into a tf.data.Dataset."""
ds = keras.utils.image_dataset_from_directory(
folder_path, label_mode='int', image_size=(256, 256),
batch_size=None, shuffle=False)
return ds
train_ds = _load_image_folder_tf(
os.path.join(data_dir, 'train_valid_test', 'train'))
train_valid_ds = _load_image_folder_tf(
os.path.join(data_dir, 'train_valid_test', 'train_valid'))
valid_ds = _load_image_folder_tf(
os.path.join(data_dir, 'train_valid_test', 'valid'))
test_ds = _load_image_folder_tf(
os.path.join(data_dir, 'train_valid_test', 'test'))train_iter = (train_ds.map(transform_train_fn, num_parallel_calls=tf.data.AUTOTUNE)
.shuffle(10000).batch(batch_size, drop_remainder=True)
.prefetch(tf.data.AUTOTUNE))
train_valid_iter = (train_valid_ds.map(transform_train_fn,
num_parallel_calls=tf.data.AUTOTUNE)
.shuffle(10000).batch(batch_size, drop_remainder=True)
.prefetch(tf.data.AUTOTUNE))
valid_iter = (valid_ds.map(transform_test_fn, num_parallel_calls=tf.data.AUTOTUNE)
.batch(batch_size, drop_remainder=True)
.prefetch(tf.data.AUTOTUNE))
test_iter = (test_ds.map(transform_test_fn, num_parallel_calls=tf.data.AUTOTUNE)
.batch(batch_size, drop_remainder=False)
.prefetch(tf.data.AUTOTUNE))This competition is close to ImageNet, so we reuse a pretrained ResNet as a frozen feature extractor and train only a small 120-way breed classifier:
def get_net():
# Load pretrained ResNet50, freeze backbone, add custom head. Keep the
# ImageNet logits as frozen features to match the PyTorch tab.
backbone = keras.applications.ResNet50(
weights='imagenet', include_top=True, classifier_activation=None,
input_shape=(224, 224, 3))
backbone.trainable = False
inputs = keras.Input(shape=(224, 224, 3))
x = backbone(inputs, training=False)
x = keras.layers.Dense(256, activation='relu')(x)
outputs = keras.layers.Dense(120)(x)
finetune_net = keras.Model(inputs, outputs)
return finetune_netOnly the custom output network receives gradients. The validation loss is computed through the same frozen features, so it measures whether the dog-breed head is generalizing:
loss = keras.losses.SparseCategoricalCrossentropy(
from_logits=True, reduction='none')
def evaluate_loss(data_iter, net):
l_sum, n = 0.0, 0
for features, labels in data_iter:
logits = net(features, training=False)
l = loss(labels, logits)
l_sum += float(tf.reduce_sum(l))
n += len(labels)
return l_sum / nThe helper is mostly framework bookkeeping. The training structure is:
That is the practical transfer-learning tradeoff: far less memory and time, while keeping most ImageNet visual knowledge.
Expect validation loss to be the useful curve here; with 120 fine-grained classes, top-line accuracy can be noisy on the tiny book subset. On the full competition data, train longer and tune the head/augmentation strength.
train loss 0.336, valid loss 1.547
106.8 examples/sec
Write one probability vector per test image. The CSV has image id plus 120 breed probabilities, so the final layer must stay aligned with the competition’s class order:
net = get_net()
net = train(net, train_valid_iter, None, num_epochs, lr, wd, lr_period,
lr_decay)
preds = []
for data, label in test_iter:
logits = net(data, training=False)
output = tf.nn.softmax(logits, axis=-1)
preds.extend(output.numpy())
# Get class names from the train_valid dataset directory
class_names = sorted(os.listdir(
os.path.join(data_dir, 'train_valid_test', 'train_valid')))
ids = sorted(os.listdir(
os.path.join(data_dir, 'train_valid_test', 'test', 'unknown')))
with open('submission.csv', 'w') as f:
f.write('id,' + ','.join(class_names) + '\n')
for i, output in zip(ids, preds):
f.write(i.split('.')[0] + ',' + ','.join(
[str(num) for num in output]) + '\n')train loss 0.341
112.3 examples/sec