Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 105 additions & 0 deletions models/entail1509.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
"""
A model that is similar to the one from
Rocktaschel et al. "Reasoning about entailment with neural attention."
approaches 2015-state-of-art results on the anssel-wang task (with
token flags).

The implementation is inspired by https://github.com/shyamupa/snli-entailment/blob/master/amodel.py


"""

from keras.layers.core import Layer
from keras.layers import GRU, Dropout, Lambda, Dense, RepeatVector, TimeDistributedDense, Activation, Reshape, Permute, Flatten
from keras.regularizers import l2
from keras import backend as K
import pysts.kerasts.blocks as B

def config(c):
c['dropout'] = 4/5
c['dropoutfix_inp'] = 0
c['dropoutfix_rec'] = 0
c['l2reg'] = 1e-4
c['rnnact'] = 'tanh'
c['rnninit'] = 'glorot_uniform'
c['sdim'] = 2
c['ptscorer']=B.to_n_simple_ptscorer



def get_H_n(X):
ans=X[:, -1, :]
return ans


def generate_get_Y(size):
def get_Y_generator(X):
return get_Y(X,size)
return get_Y_generator

def get_Y(X, size):
return X[:, :size, :]

def get_H_0():
return X[:, 0, :]

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unused? Anything else unused?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unused functions should be gone now.



class SplitSequence(Layer):
def __init__(self, split_ind , **kwargs):
self.split_ind=split_ind
super(SplitSequence, self).__init__(**kwargs)

def call(self, x, mask=None):
return x[:, :self.split_ind, :]

def get_output_shape_for(self, input_shape):
input_shape[1]=self.split_ind
return input_shape


def get_R(X):
Y, alpha = X.values()
ans=K.T.batched_dot(Y,alpha)
return ans

def rnn_input(model, N, spad, input,c):

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use rnn_input from kerasts?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, after some changes I made.

model.add_node(name='forward', input=input,
layer=GRU(input_dim=N, output_dim=N, input_length=2*spad,
init=c['rnninit'], activation=c['rnnact'],
return_sequences=True,
dropout_W=c['dropoutfix_inp'], dropout_U=c['dropoutfix_rec']))

model.add_node(name='backward', input=input,
layer=GRU(input_dim=N, output_dim=N, input_length=2*spad,
init=c['rnninit'], activation=c['rnnact'],
return_sequences=True, go_backwards=True,
dropout_W=c['dropoutfix_inp'], dropout_U=c['dropoutfix_rec']))
outputs=['e0s_', 'e1s_']
model.add_node(name='rnndrop', inputs=['forward', 'backward'], merge_mode='concat' ,
layer=Dropout(c['dropout'], input_shape=(2*spad, int(N*c['sdim'])) ))
return ['rnndrop']*2


def entailment_embedding(model, inputs,N=608, spad=60, l2reg=1e-4):
model.add_node(Lambda(get_H_n, output_shape=(N,)), name='h_n', input=inputs[1])

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we rename get_H_n?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok.

model.add_node(SplitSequence(spad), name='Y', input=inputs[0])

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe FirstSequence is better name?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed it to FirstSentence.

model.add_node(Dense(N,W_regularizer=l2(l2reg)),name='Wh_n', input='h_n')
model.add_node(RepeatVector(spad), name='Wh_n_cross_e', input='Wh_n')
model.add_node(TimeDistributedDense(N,W_regularizer=l2(l2reg)), name='WY', input='Y')
model.add_node(Activation('tanh'), name='M', inputs=['Wh_n_cross_e', 'WY'], merge_mode='sum')
model.add_node(TimeDistributedDense(1,activation='softmax'), name='alpha', input='M')

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

softmax on 1 dim?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, it was a typo, replaced it with linear

model.add_node(Permute((2,1)), name="Yp", input='Y')
model.add_node(Lambda(get_R, output_shape=(N,1)), name='_r', inputs=['Yp','alpha'], merge_mode='join')
model.add_node(Flatten(input_shape = (N,1)),name='r', input='_r')
model.add_node(Dense(N,W_regularizer=l2(l2reg)), name='Wr', input='r')
model.add_node(Dense(N,W_regularizer=l2(l2reg)), name='Wh', input='h_n')
outputs=['Wr','Wh']
return outputs


def prep_model(model, N, s0pad, s1pad, c):
model.add_node(name="embmerge", inputs=['e0','e1'], merge_mode='concat', layer=Activation('linear'))
rnn_outputs=rnn_input(model,N,s0pad,'embmerge',c)
outputs = entailment_embedding(model, rnn_outputs,2*N,s0pad,c['l2reg'])
final_output = B.to_n_ptscorer(model, outputs, c['Ddim'], N, c['l2reg'], pfx="entail_out", output_dim=3)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should just return outputs and leave the scoring issue task-specific.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also careful that we don't do two softmax in succession.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed the scorer and two softmax in succession.

return [final_output]
23 changes: 23 additions & 0 deletions pysts/kerasts/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,29 @@ def rnn_input(model, N, spad, dropout=3/4, dropoutfix_inp=0, dropoutfix_rec=0,
model.add_shared_node(name=pfx+'rnndrop', inputs=[pfx+'e0s', pfx+'e1s'], outputs=[pfx+'e0s_', pfx+'e1s_'],
layer=Dropout(dropout, input_shape=(spad, int(N*sdim)) if return_sequences else (int(N*sdim),)))

def prep_to_n_kwargs(inputs, extra_inp):
kwargs = {}
inputs = list(inputs)
if len(inputs)+len(extra_inp)==1:
if len(inputs)>len(extra_inp):
kwargs['input']=inputs[0]
else:
kwargs['input']=inputs[0]
else:
kwargs['inputs']=inputs+extra_inp
kwargs['merge_mode']='sum'
return kwargs

def to_n_ptscorer(model, inputs, Ddim, N, l2reg, pfx='out', extra_inp=[], output_dim=1):
kwargs = prep_to_n_kwargs(inputs, extra_inp)
model.add_node(Activation('tanh'), name=pfx+'to_n_sum', **kwargs)
model.add_node(Dense(output_dim=output_dim,activation='softmax',W_regularizer=l2(l2reg)), name=pfx+'to_n_out', input=pfx+'to_n_sum')
return (pfx+"to_n_out")

def to_n_simple_ptscorer(model, inputs, Ddim, N, l2reg, pfx='out', extra_inp=[], output_dim=1):
kwargs = prep_to_n_kwargs(inputs, extra_inp)
model.add_node(Dense(output_dim=output_dim,activation='linear',W_regularizer=l2(l2reg)), name=pfx+'_to_n_out', **kwargs)
return pfx+'_to_n_out'

def add_multi_node(model, name, inputs, outputs, layer_class,
layer_args, siamese=True, **kwargs):
Expand Down
6 changes: 3 additions & 3 deletions tasks/rte.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,11 @@ def prep_model(self, module_prep_model):
kwargs['Dinit'] = self.c['Dinit']

model.add_node(name='scoreS0', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], pfx="out0", **kwargs),
layer=Activation('sigmoid'))
layer=Activation('linear'))
model.add_node(name='scoreS1', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], pfx="out1", **kwargs),
layer=Activation('sigmoid'))
layer=Activation('linear'))
model.add_node(name='scoreS2', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], pfx="out2", **kwargs),
layer=Activation('sigmoid'))
layer=Activation('linear'))

model.add_node(name='scoreV', inputs=['scoreS0', 'scoreS1', 'scoreS2'], merge_mode='concat', layer=Activation('softmax'))
model.add_output(name='score', input='scoreV')
Expand Down