I'm using this tutorial to build an Adaboost.SAMME classifier for object recognition, using HoG features. This is my code below, mostly only the top part is customized according to my problem, otherwise most of it is the same as in the tutorial. This is a very small test I'm doing, with only 17 images in all, 10 for training, 7 for testing. Once I get this up and running, I'll add loads of more images for proper training.

import sys from scipy 
import misc, ndimage from skimage 
import data, io, filter, color, exposure 
from skimage.viewer import ImageViewer 
from skimage.feature import hog from skimage.transform 
import resize import matplotlib.pyplot as plt 
from sklearn.datasets import make_gaussian_quantiles 
from sklearn.ensemble import AdaBoostClassifier 
from sklearn.externals.six.moves import xrange 
from sklearn.metrics import accuracy_score 
from sklearn.tree import DecisionTreeClassifier 
import pylab as pl from sklearn.externals.six.moves 
import zip

f = open("PATH_TO_LIST_OF_SAMPLES\\samples.txt",'r') 
out = f.read().splitlines() import numpy as np

imgs = [] tmp_hogs = []
#tmp_hogs = np.zeros((17,1728)) labels = [1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0]

i=0 for file in out:
        filepath = "PATH_TO_IMAGES\\imgs\\"
        readfile = filepath + file
        curr_img = color.rgb2gray(io.imread(readfile))
        imgs.append(curr_img)
        fd, hog_image = hog(curr_img, orientations=8, pixels_per_cell=(16, 16),
                 cells_per_block=(1, 1), visualise=True, normalise=True)
        tmp_hogs.append(fd) 
        i+=1
        img_hogs = np.array(tmp_hogs, dtype =float)

n_split = 10 
X_train, X_test = np.array(img_hogs[:n_split]), np.array(img_hogs[n_split:]) 
y_train, y_test = np.array(labels[:n_split]), np.array(labels[n_split:])

#now all the code below is straight off the example on scikit-learn's website

bdt_real = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=2),
    n_estimators=600,
    learning_rate=1)

bdt_discrete = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=2),
    n_estimators=600,
    learning_rate=1.5,
    algorithm="SAMME")

bdt_real.fit(X_train, y_train)
bdt_discrete.fit(X_train, y_train)

real_test_errors = []
discrete_test_errors = []

for real_test_predict, discrete_train_predict in zip(
        bdt_real.staged_predict(X_test), bdt_discrete.staged_predict(X_test)):
    real_test_errors.append(
        1. - accuracy_score(real_test_predict, y_test))
    discrete_test_errors.append(
        1. - accuracy_score(discrete_train_predict, y_test))

n_trees = xrange(1, len(bdt_discrete) + 1)

pl.figure(figsize=(15, 5))

pl.subplot(131)
pl.plot(n_trees, discrete_test_errors, c='black', label='SAMME')
pl.plot(n_trees, real_test_errors, c='black',
        linestyle='dashed', label='SAMME.R')
pl.legend()
pl.ylim(0.18, 0.62)
pl.ylabel('Test Error')
pl.xlabel('Number of Trees')

pl.subplot(132)
pl.plot(n_trees, bdt_discrete.estimator_errors_, "b", label='SAMME', alpha=.5)
pl.plot(n_trees, bdt_real.estimator_errors_, "r", label='SAMME.R', alpha=.5)
pl.legend()
pl.ylabel('Error')
pl.xlabel('Number of Trees')
pl.ylim((.2,
        max(bdt_real.estimator_errors_.max(),
            bdt_discrete.estimator_errors_.max()) * 1.2))
pl.xlim((-20, len(bdt_discrete) + 20))

pl.subplot(133)
pl.plot(n_trees, bdt_discrete.estimator_weights_, "b", label='SAMME')
pl.legend()
pl.ylabel('Weight')
pl.xlabel('Number of Trees')
pl.ylim((0, bdt_discrete.estimator_weights_.max() * 1.2))
pl.xlim((-20, len(bdt_discrete) + 20))

# prevent overlapping y-axis labels
pl.subplots_adjust(wspace=0.25)
pl.show()

But I'm getting the following error:

Traceback (most recent call last):
  File "C:\Users\app\Documents\Python Scripts\carclassify.py", line 101, in <module>
    pl.plot(n_trees, bdt_discrete.estimator_errors_, "b", label='SAMME', alpha=.5)
  File "C:\Users\app\Anaconda\lib\site-packages\matplotlib\pyplot.py", line 2987, in plot
    ret = ax.plot(*args, **kwargs)
  File "C:\Users\app\Anaconda\lib\site-packages\matplotlib\axes.py", line 4137, in plot
    for line in self._get_lines(*args, **kwargs):
  File "C:\Users\app\Anaconda\lib\site-packages\matplotlib\axes.py", line 317, in _grab_next_args
    for seg in self._plot_args(remaining, kwargs):
  File "C:\Users\app\Anaconda\lib\site-packages\matplotlib\axes.py", line 295, in _plot_args
    x, y = self._xy_from_xy(x, y)
  File "C:\Users\app\Anaconda\lib\site-packages\matplotlib\axes.py", line 237, in _xy_from_xy
    raise ValueError("x and y must have same first dimension")
ValueError: x and y must have same first dimension

So I added these lines before the tutorial section of code, in order to see the dimensions of the X and Y arrays:

print X_train.shape 
print y_train.shape
print X_test.shape 
print y_test.shape

and the output was:

(10L, 48L)
(10L,)
(7L, 48L)
(7L,)

But I'm not sure if the x and y in the error are referring to my X and y... because surely it's normal for the training and testing datasets to have different sizes. What am I doing wrong?

asked Apr 15 '14 at 10:35

asaaki's gravatar image

asaaki
1667

Be the first one to answer this question!
toggle preview

powered by OSQA

User submitted content is under Creative Commons: Attribution - Share Alike; Other things copyright (C) 2010, MetaOptimize LLC.