from d2l import mxnet as d2l
from mxnet import autograd, gluon, np, npx
from mxnet.gluon import nn
import mxnet as mx
npx.set_np()AutoRec (Sedhain et al., 2015) — recasts collaborative filtering as autoencoder reconstruction.
The input is a partially observed rating vector for one item (1 column of the rating matrix, length = #users, with zeros for unobserved entries). The autoencoder reconstructs it. Loss is computed only at the observed positions — unobserved entries are ignored.
\mathcal{L} = \sum_{(u,i) \in \Omega} (r_{ui} - h(\mathbf{r}_{*i}; \theta)_u)^2 + \lambda \|\theta\|^2.
Adds the nonlinearity that pure MF lacks. Two variants: user-based (input = ratings the user gave) and item-based (input = ratings the item received). The deck implements item-based.
The setup cell selects the backend-specific d2l package and tensor library. The model itself is the same idea in both tabs: reconstruct an item rating vector with a masked loss.
Encoder: linear -> activation -> bottleneck. Decoder: linear -> ratings. During training, the forward pass masks unobserved entries so gradients come only from known ratings:
class AutoRec(nn.Block):
def __init__(self, num_hidden, num_users, dropout=0.05):
super(AutoRec, self).__init__()
self.encoder = nn.Dense(num_hidden, activation='sigmoid',
use_bias=True)
self.decoder = nn.Dense(num_users, use_bias=True)
self.dropout = nn.Dropout(dropout)
def forward(self, input):
hidden = self.dropout(self.encoder(input))
pred = self.decoder(hidden)
if autograd.is_training(): # Mask the gradient during training
return pred * np.sign(input)
else:
return predRMSE only over observed positions (mask out the zeros):
def evaluator(network, inter_matrix, test_data, devices):
scores = []
for values in inter_matrix:
feat = gluon.utils.split_and_load(values, devices, even_split=False)
scores.extend([network(i).asnumpy() for i in feat])
recons = np.array([item for sublist in scores for item in sublist])
# Calculate the test RMSE
rmse = np.sqrt(np.sum(np.square(test_data - np.sign(test_data) * recons))
/ np.sum(np.sign(test_data)))
return float(rmse)Standard SGD; the masked loss is the trick that turns autoencoder loss into a recommender:
devices = d2l.try_all_gpus()
# Load the MovieLens 100K dataset
df, num_users, num_items = d2l.read_data_ml100k()
train_data, test_data = d2l.split_data_ml100k(df, num_users, num_items)
_, _, _, train_inter_mat = d2l.load_data_ml100k(train_data, num_users,
num_items)
_, _, _, test_inter_mat = d2l.load_data_ml100k(test_data, num_users,
num_items)
train_iter = gluon.data.DataLoader(train_inter_mat, shuffle=True,
last_batch="rollover", batch_size=256,
num_workers=d2l.get_dataloader_workers())
test_iter = gluon.data.DataLoader(np.array(train_inter_mat), shuffle=False,
last_batch="keep", batch_size=1024,
num_workers=d2l.get_dataloader_workers())
# Model initialization, training, and evaluation
net = AutoRec(500, num_users)
net.initialize(ctx=devices, force_reinit=True, init=mx.init.Normal(0.01))
lr, num_epochs, wd, optimizer = 0.002, 25, 1e-5, 'adam'
loss = gluon.loss.L2Loss()
trainer = gluon.Trainer(net.collect_params(), optimizer,
{"learning_rate": lr, 'wd': wd})
d2l.train_recsys_rating(net, train_iter, test_iter, loss, trainer, num_epochs,
devices, evaluator, inter_mat=test_inter_mat)Watch the plot for two signals: training loss should fall, and test RMSE should stabilize rather than diverge. Overfitting shows up when reconstruction keeps improving but held-out RMSE worsens.