from d2l import torch as d2l
import torch
from torch import nn
import numpy as npAutoRec (Sedhain et al., 2015) — recasts collaborative filtering as autoencoder reconstruction.
The input is a partially observed rating vector for one item (1 column of the rating matrix, length = #users, with zeros for unobserved entries). The autoencoder reconstructs it. Loss is computed only at the observed positions — unobserved entries are ignored.
\mathcal{L} = \sum_{(u,i) \in \Omega} (r_{ui} - h(\mathbf{r}_{*i}; \theta)_u)^2 + \lambda \|\theta\|^2.
Adds the nonlinearity that pure MF lacks. Two variants: user-based (input = ratings the user gave) and item-based (input = ratings the item received). The deck implements item-based.
The setup cell selects the backend-specific d2l package and tensor library. The model itself is the same idea in both tabs: reconstruct an item rating vector with a masked loss.
Encoder: linear -> activation -> bottleneck. Decoder: linear -> ratings. During training, the forward pass masks unobserved entries so gradients come only from known ratings:
class AutoRec(nn.Module):
def __init__(self, num_hidden, num_users, dropout=0.05):
super().__init__()
self.encoder = nn.Linear(num_users, num_hidden)
self.decoder = nn.Linear(num_hidden, num_users)
self.dropout = nn.Dropout(dropout)
def forward(self, input):
hidden = self.dropout(torch.sigmoid(self.encoder(input)))
pred = self.decoder(hidden)
if self.training: # Mask the gradient during training
return pred * torch.sign(input)
else:
return predRMSE only over observed positions (mask out the zeros):
def evaluator(network, inter_matrix, test_data, devices):
network.eval()
scores = []
with torch.no_grad():
for values in inter_matrix:
values = values.to(devices[0])
scores.append(network(values).cpu().numpy())
recons = np.concatenate(scores, axis=0)
# Calculate the test RMSE
rmse = np.sqrt(
np.sum(np.square(test_data - np.sign(test_data) * recons))
/ np.sum(np.sign(test_data)))
return float(rmse)Standard SGD; the masked loss is the trick that turns autoencoder loss into a recommender:
devices = d2l.try_all_gpus()
# Load the MovieLens 100K dataset
df, num_users, num_items = d2l.read_data_ml100k()
train_data, test_data = d2l.split_data_ml100k(df, num_users, num_items)
_, _, _, train_inter_mat = d2l.load_data_ml100k(train_data, num_users,
num_items)
_, _, _, test_inter_mat = d2l.load_data_ml100k(test_data, num_users,
num_items)
train_inter_mat_t = torch.tensor(train_inter_mat, dtype=torch.float32)
test_inter_mat_np = np.array(test_inter_mat)
train_iter = torch.utils.data.DataLoader(train_inter_mat_t, shuffle=True,
drop_last=True, batch_size=256,
num_workers=d2l.get_dataloader_workers())
test_iter = torch.utils.data.DataLoader(train_inter_mat_t, shuffle=False,
batch_size=1024,
num_workers=d2l.get_dataloader_workers())
# Model initialization, training, and evaluation
net = AutoRec(500, num_users)
nn.init.normal_(net.encoder.weight, std=0.01)
nn.init.normal_(net.decoder.weight, std=0.01)
net = net.to(devices[0])
lr, num_epochs, wd = 0.002, 25, 1e-5
loss = nn.MSELoss(reduction='sum')
optimizer = torch.optim.Adam(net.parameters(), lr=lr, weight_decay=wd)
timer = d2l.Timer()
animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0, 2],
legend=['train loss', 'test RMSE'])
for epoch in range(num_epochs):
net.train()
total_loss, n = 0., 0
for i, values in enumerate(train_iter):
timer.start()
values = values.to(devices[0])
optimizer.zero_grad()
preds = net(values)
l = loss(preds, values * torch.sign(values))
l.backward()
optimizer.step()
total_loss += l.item()
n += values.shape[0]
timer.stop()
test_rmse = evaluator(net, test_iter, test_inter_mat_np, devices)
train_l = total_loss / n
animator.add(epoch + 1, (train_l, test_rmse))
print(f'train loss {total_loss / n:.3f}, test RMSE {test_rmse:.3f}')train loss 39.877, test RMSE 0.906
Watch the plot for two signals: training loss should fall, and test RMSE should stabilize rather than diverge. Overfitting shows up when reconstruction keeps improving but held-out RMSE worsens.