Source code for geometric2dr.embedding_methods.cbow

"""CBOW model with negative sampling as in Mikolov et al. [5]_. 

It is used with the corpus classes in cbow_data_reader which handles 
the data reading and loading. This allows construction of full CBOW
based systems. It is one of the choices of neural language model for
recreating DGK [2]_ like systems.

"""

# Author: Paul Scherer

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import init

[docs]class Cbow(nn.Module): """Pytorch implementation of the CBOW architecture with negative sampling as in Mikolov et al. [5]_ This is used in DGK models for example to learn embeddings of substructures for downstream graph kernel definitions. Parameters ---------- num_targets : int The number of targets to embed. Typically the number of substructure patterns, but can be repurposed to be number of graphs. vocab_size : int The size of the vocabulary; the number of unique substructure patterns embedding_dimension : int The desired dimensionality of the embeddings. Returns ------- self : CBow a torch.nn.Module of the CBOW model """ def __init__(self, num_targets, vocab_size, embedding_dimension): super(Cbow, self).__init__() self.num_targets = num_targets self.embedding_dimension = embedding_dimension self.vocab_size = vocab_size self.target_embeddings = nn.Embedding(num_targets, embedding_dimension) #D self.context_embeddings = nn.Embedding(vocab_size, embedding_dimension) #W self.output_layer = nn.Embedding(vocab_size, embedding_dimension) #O # Xavier initialization of weights initrange = 1.0 / (self.embedding_dimension) init.uniform_(self.target_embeddings.weight.data, -initrange, initrange) init.constant_(self.context_embeddings.weight.data, 0) self.linear1 = nn.Linear(embedding_dimension, 128) self.activation1 = nn.ReLU() self.linear2 = nn.Linear(128, vocab_size) self.activation2 = nn.LogSoftmax(dim=-1)
[docs] def forward(self, pos_target, pos_contexts, pos_negatives): """Forward pass in network Parameters ---------- pos_target : torch.Long index of target embedding pos_contexts : torch.Long indices of context embeddings pos_negatives : torch.Long indices of negatives Returns ------- torch.float the negative sampling loss """ # Be aware pos_contexts is typically several context embeddings # emb_target_graph = self.context_embeddings(pos_graph_emb) # mean_contexts_subgraphs = torch.mean(self.context_embeddings(pos_contexts), dim=1) # stack_target_contexts = torch.cat((emb_target_graph, mean_contexts_subgraphs), dim=1) # h = self.linear1(stack_target_contexts) # h = self.activation1(h) # h = self.linear2(h) # out = self.activation2(h) # return out # Negative sampling CBOW mean_contexts = torch.sum(self.context_embeddings(pos_contexts), dim=1) emb_context_target = self.output_layer(pos_target) emb_negative_targets = self.output_layer(pos_negatives) objective = torch.sum(torch.mul(mean_contexts, emb_context_target), dim=1) # mul is elementwise multiplication objective = torch.clamp(objective, max=10, min=-10) objective = -F.logsigmoid(objective) neg_objective = torch.bmm(emb_negative_targets, mean_contexts.unsqueeze(2)).squeeze() neg_objective = torch.clamp(neg_objective, max=10, min=-10) neg_objective = -torch.sum(F.logsigmoid(-neg_objective), dim=1) return torch.mean(objective+neg_objective)
[docs] def give_target_embeddings(self): """Return the target embeddings as a numpy matrix Returns ------- numpy ndarray Numpy vocab_size x emb_dimension matrix of substructure pattern embeddings """ embedding = self.target_embeddings.weight.cpu().data.numpy() return embedding