Hi all,
I have made a simple RNN with Theano to test some stuff out. However for a small test I would like to add more taps to the hidden layer's output.
However, this gives me the following error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Anaconda\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 585, in runfile
execfile(filename, namespace)
File "C:/Dev/Spyder/Theano_Experiments/RNN/simple_rnn.py", line 531, in <module>
test_real()
File "C:/Dev/Spyder/Theano_Experiments/RNN/simple_rnn.py", line 509, in test_real
model.fit(seq, targets, validation_frequency=1000)
File "C:/Dev/Spyder/Theano_Experiments/RNN/simple_rnn.py", line 418, in fit
gparam = T.grad(cost, param)
File "C:\Anaconda\lib\site-packages\theano\gradient.py", line 542, in grad
grad_dict, wrt, cost_name)
File "C:\Anaconda\lib\site-packages\theano\gradient.py", line 1272, in _populate_grad_dict
rval = [access_grad_cache(elem) for elem in wrt]
File "C:\Anaconda\lib\site-packages\theano\gradient.py", line 1232, in access_grad_cache
term = access_term_cache(node)[idx]
File "C:\Anaconda\lib\site-packages\theano\gradient.py", line 1082, in access_term_cache
input_grads = node.op.grad(inputs, new_output_grads)
File "C:\Anaconda\lib\site-packages\theano\scan_module\scan_op.py", line 1729, in grad
outer_inp_mitmot.append(dC_douts[idx + offset][::-1])
TypeError: 'Variable' object has no attribute '__getitem__'
Below is some full code included.
# -*- coding: utf-8 -*-
"""
Created on Tue Oct 07 13:28:51 2014
@author: vaneetke
"""
import numpy as np
import theano
import theano.tensor as T
dtype='float32'
# sequences: x_t
# prior results: h_tm1
# non-sequences: W_ih, W_hh, W_ho, b_h
def one_step(x_t, h_tm2, h_tm1, W_ih, W_hh, b_h, W_ho, b_o):
h_t = T.tanh(theano.dot(x_t, W_ih) + theano.dot(h_tm1, W_hh) + theano.dot(h_tm2, W_hh) + b_h)
y_t = theano.dot(h_t, W_ho) + b_o
return [h_t, y_t]
#first dimension is time
x = T.matrix(dtype=dtype)
n_hid = 3
n_in = 1
n_out = 1
W_hh_values = np.array(np.random.uniform(size=(n_hid, n_hid), low=-.01, high=.01), dtype=dtype)
h0_value = np.array(np.random.uniform(size=(2,n_hid), low=-.01, high=.01), dtype=dtype)
b_h_value = np.array(np.random.uniform(size=(n_hid), low=-.01, high=.01), dtype=dtype)
W_ih_values = np.array(np.random.uniform(size=(n_in, n_hid), low=-.01, high=.01), dtype=dtype)
W_ho_values = np.array(np.random.uniform(size=(n_hid, n_out), low=-.01, high=.01), dtype=dtype)
b_o_value = np.array(np.random.uniform(size=(n_out), low=-.01, high=.01), dtype=dtype)
# parameters of the rnn
b_h = theano.shared(b_h_value)
h0 = theano.shared(h0_value)
W_ih = theano.shared(W_ih_values)
W_hh = theano.shared(W_hh_values)
W_ho = theano.shared(W_ho_values)
b_o = theano.shared(b_o_value)
params = [W_ih, W_hh, b_h, W_ho, b_o, h0]
# hidden and outputs of the entire sequence
[h_vals, y_vals], _ = theano.scan(fn=one_step,
sequences = dict(input = x),
outputs_info = [dict(initial=h0, taps=[ -2, -1]), None], # corresponds to the return type of one_step
non_sequences = [W_ih, W_hh, b_h, W_ho, b_o]
)
# target values
t = T.matrix(dtype=dtype)
# learning rate
lr = np.cast[dtype](0.1)
learning_rate = theano.shared(lr)
cost = (0.5*((y_vals - t)**2.0).mean()) + (0.5*(y_vals.std() - t.std())**2.0)
gparams = []
for param in params:
gparam = T.grad(cost, param)
gparams.append(gparam)
updates=[]
for param, gparam in zip(params, gparams):
updates.append((param, param - gparam * learning_rate))
learn_rnn_fn = theano.function(inputs = [x, t],
outputs = cost,
updates = updates
)
eval_rnn_fn = theano.function(inputs = [x],
outputs = y_vals
)
# artificial data
x_ = np.array(np.arange(0.,100.,0.21), dtype=dtype)
x_ = x_.reshape(len(x_), 1)
s_ = np.sin(x_)
t_ = np.roll(s_, -1)[:-1]
s_ = s_[:-1]
for i in xrange(100):
cost = learn_rnn_fn(s_,t_)
print i, " - cost: ", cost.mean()
pred = eval_rnn_fn(s_)
from matplotlib import pyplot as plt
plt.plot(t_)
plt.plot(pred, '--')
plt.show()
This works fine without having the second tap added. (so only taps=[-1] and having only h_tm1 in the one_step() function.)
Am I doing something wrong here or may this be a bug with Theano?