Hi all,

I have made a simple RNN with Theano to test some stuff out. However for a small test I would like to add more taps to the hidden layer's output.

However, this gives me the following error:

Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "C:\Anaconda\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 585, in runfile
    execfile(filename, namespace)
  File "C:/Dev/Spyder/Theano_Experiments/RNN/simple_rnn.py", line 531, in <module>
    test_real()
  File "C:/Dev/Spyder/Theano_Experiments/RNN/simple_rnn.py", line 509, in test_real
    model.fit(seq, targets, validation_frequency=1000)
  File "C:/Dev/Spyder/Theano_Experiments/RNN/simple_rnn.py", line 418, in fit
    gparam = T.grad(cost, param)
  File "C:\Anaconda\lib\site-packages\theano\gradient.py", line 542, in grad
    grad_dict, wrt, cost_name)
  File "C:\Anaconda\lib\site-packages\theano\gradient.py", line 1272, in _populate_grad_dict
    rval = [access_grad_cache(elem) for elem in wrt]
  File "C:\Anaconda\lib\site-packages\theano\gradient.py", line 1232, in access_grad_cache
    term = access_term_cache(node)[idx]
  File "C:\Anaconda\lib\site-packages\theano\gradient.py", line 1082, in access_term_cache
    input_grads = node.op.grad(inputs, new_output_grads)
  File "C:\Anaconda\lib\site-packages\theano\scan_module\scan_op.py", line 1729, in grad
    outer_inp_mitmot.append(dC_douts[idx + offset][::-1])
TypeError: 'Variable' object has no attribute '__getitem__'

Below is some full code included.

# -*- coding: utf-8 -*-

"""

Created on Tue Oct 07 13:28:51 2014

@author: vaneetke

"""

import numpy as np

import theano

import theano.tensor as T

dtype='float32'

# sequences: x_t

# prior results: h_tm1

# non-sequences: W_ih, W_hh, W_ho, b_h

def one_step(x_t, h_tm2, h_tm1, W_ih, W_hh, b_h, W_ho, b_o):

        h_t = T.tanh(theano.dot(x_t, W_ih) + theano.dot(h_tm1, W_hh) + theano.dot(h_tm2, W_hh) + b_h)

        y_t = theano.dot(h_t, W_ho) + b_o

        return [h_t, y_t]

#first dimension is time

x = T.matrix(dtype=dtype)

n_hid = 3

n_in  = 1

n_out = 1

W_hh_values = np.array(np.random.uniform(size=(n_hid, n_hid), low=-.01, high=.01), dtype=dtype)

h0_value = np.array(np.random.uniform(size=(2,n_hid), low=-.01, high=.01), dtype=dtype)

b_h_value = np.array(np.random.uniform(size=(n_hid), low=-.01, high=.01), dtype=dtype)

W_ih_values = np.array(np.random.uniform(size=(n_in, n_hid), low=-.01, high=.01), dtype=dtype)

W_ho_values = np.array(np.random.uniform(size=(n_hid, n_out), low=-.01, high=.01), dtype=dtype)

b_o_value = np.array(np.random.uniform(size=(n_out), low=-.01, high=.01), dtype=dtype)

# parameters of the rnn

b_h = theano.shared(b_h_value)

h0 = theano.shared(h0_value)

W_ih = theano.shared(W_ih_values)

W_hh = theano.shared(W_hh_values)

W_ho = theano.shared(W_ho_values)

b_o = theano.shared(b_o_value)

params = [W_ih, W_hh, b_h, W_ho, b_o, h0]

# hidden and outputs of the entire sequence

[h_vals, y_vals], _ = theano.scan(fn=one_step,

                                sequences = dict(input = x),

                                outputs_info = [dict(initial=h0, taps=[ -2, -1]), None], # corresponds to the return type of one_step

                                non_sequences = [W_ih, W_hh, b_h, W_ho, b_o]

                            )

# target values

t = T.matrix(dtype=dtype)

# learning rate

lr = np.cast[dtype](0.1)

learning_rate = theano.shared(lr)

cost = (0.5*((y_vals - t)**2.0).mean()) + (0.5*(y_vals.std() - t.std())**2.0)

gparams = []

for param in params:

  gparam = T.grad(cost, param)

  gparams.append(gparam)

updates=[]

for param, gparam in zip(params, gparams):

    updates.append((param, param - gparam * learning_rate))

learn_rnn_fn = theano.function(inputs = [x, t],

                           outputs = cost,

                           updates = updates

                           )

eval_rnn_fn = theano.function(inputs = [x],

                           outputs = y_vals

                           )

# artificial data

x_ = np.array(np.arange(0.,100.,0.21), dtype=dtype)

x_ = x_.reshape(len(x_), 1)

s_ = np.sin(x_)

t_ = np.roll(s_, -1)[:-1]

s_ = s_[:-1]

for i in xrange(100):

    cost = learn_rnn_fn(s_,t_)

    print i, " - cost: ", cost.mean()

pred = eval_rnn_fn(s_)

from matplotlib import pyplot as plt

plt.plot(t_)

plt.plot(pred, '--')

plt.show()

This works fine without having the second tap added. (so only taps=[-1] and having only h_tm1 in the one_step() function.)

Am I doing something wrong here or may this be a bug with Theano?

asked Dec 15 '14 at 07:50

Kenny%20Vaneetvelde's gravatar image

Kenny Vaneetvelde
1222

Be the first one to answer this question!
toggle preview

powered by OSQA

User submitted content is under Creative Commons: Attribution - Share Alike; Other things copyright (C) 2010, MetaOptimize LLC.