siftpp.cpp

00001 
00002 /***************************************************************************
00003  *  siftpp.cpp - siftpp based classifier 
00004  *
00005  *  Created: Sat Apr 12 10:15:23 2008
00006  *  Copyright 2008 Stefan Schiffer [stefanschiffer.de]
00007  *
00008  ****************************************************************************/
00009 
00010 /*  This program is free software; you can redistribute it and/or modify
00011  *  it under the terms of the GNU General Public License as published by
00012  *  the Free Software Foundation; either version 2 of the License, or
00013  *  (at your option) any later version. A runtime exception applies to
00014  *  this software (see LICENSE.GPL_WRE file mentioned below for details).
00015  *
00016  *  This program is distributed in the hope that it will be useful,
00017  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00018  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00019  *  GNU Library General Public License for more details.
00020  *
00021  *  Read the full text in the LICENSE.GPL_WRE file in the doc directory.
00022  */
00023 
00024 #include <iostream>
00025 #include <vector>
00026 
00027 #include <classifiers/siftpp.h>
00028 
00029 //#ifdef SIFTPP_TIMETRACKER
00030 #include <utils/time/clock.h>
00031 #include <utils/time/tracker.h>
00032 //#endif
00033 
00034 #include <core/exception.h>
00035 #include <core/exceptions/software.h>
00036 #include <fvutils/color/colorspaces.h>
00037 #include <fvutils/color/conversions.h>
00038 #include <fvutils/readers/png.h>
00039 //#include <fvutils/writers/pnm.h>
00040 //#include <fvutils/writers/png.h>
00041 
00042 //using namespace fawkes; 
00043 using namespace fawkes; 
00044 
00045 namespace firevision {
00046 #if 0 /* just to make Emacs auto-indent happy */
00047 }
00048 #endif
00049 
00050 /** @class SiftppClassifier <classifiers/siftpp.h>
00051  * SIFTPP classifier.
00052  *
00053  * This class provides a classifier that uses SIFTPP to detect objects in a given
00054  * image by matching features. The objects are reported back as regions of interest. 
00055  * Each ROI contains an object. ROIs with 11x11 are matched features.
00056  *
00057  * This code uses siftpp from http://vision.ucla.edu/~vedaldi/code/siftpp/siftpp.html
00058  * and is partly based on code from their package.
00059  *
00060  * @author Stefan Schiffer
00061  */
00062 
00063 /** Constructor.
00064  * @param object_file file that contains an image of the object to detect
00065  * @param samplingStep Initial sampling step
00066  * @param octaves Number of analysed octaves
00067  * @param levels Number of levels per octave
00068  * @param magnif Keypoint magnification (default = 3)
00069  * @param noorient rotation invariance (0) or upright (1)
00070  * @param unnormalized Normalization of features (default 0)
00071  */
00072 SiftppClassifier::SiftppClassifier( const char * object_file,
00073                                     int samplingStep, int octaves, int levels,
00074                                     float magnif, int noorient, int unnormalized)
00075   : Classifier("SiftppClassifier")
00076 {
00077   // params for FastHessian
00078   __samplingStep = samplingStep;
00079   __octaves = octaves;
00080   __levels = levels;
00081   // params for Descriptors
00082   __first          = -1 ;
00083   __threshold      = 0.04f / __levels / 2.0f ;
00084   __edgeThreshold  = 10.0f;
00085   __magnif         = magnif;
00086   __noorient       = noorient;
00087   __unnormalized   = unnormalized;
00088 
00089   // descriptor vector length
00090   __vlen = 128;
00091 
00092 
00093   //#ifdef SIFTPP_TIMETRACKER
00094   __tt = new TimeTracker();
00095   __loop_count = 0;
00096   __ttc_objconv = __tt->add_class("ObjectConvert");
00097   __ttc_objfeat = __tt->add_class("ObjectFeatures");
00098   __ttc_imgconv = __tt->add_class("ImageConvert");
00099   __ttc_imgfeat = __tt->add_class("ImageFeatures");
00100   __ttc_matchin = __tt->add_class("Matching");
00101   __ttc_roimerg = __tt->add_class("MergeROIs");
00102   //#endif
00103 
00104   //#ifdef SIFTPP_TIMETRACKER
00105   __tt->ping_start(__ttc_objconv);
00106   //#endif
00107   
00108   PNGReader pngr( object_file );
00109   unsigned char* buf = malloc_buffer( pngr.colorspace(), pngr.pixel_width(), pngr.pixel_height() );
00110   pngr.set_buffer( buf );
00111   pngr.read();
00112   
00113   unsigned int lwidth = pngr.pixel_width();
00114   unsigned int lheight = pngr.pixel_height();
00115   VL::pixel_t * im_pt = new VL::pixel_t [lwidth * lheight ];
00116   VL::pixel_t * start = im_pt;
00117   //VL::pixel_t* end   = start + lwidth*lheight ; 
00118   for (unsigned int h = 0; h < lheight; ++h) {
00119     for (unsigned int w = 0; w < lwidth ; ++w) {
00120       int i = (buf[h * lwidth + w] );
00121       VL::pixel_t norm = VL::pixel_t( 255 );
00122       *start++ = VL::pixel_t( i ) / norm;
00123     }
00124   }
00125   // make image
00126   __obj_img = new VL::PgmBuffer();
00127   __obj_img->width  = lwidth;
00128   __obj_img->height = lheight;
00129   __obj_img->data   = im_pt;
00130 
00131   if ( ! __obj_img ) {
00132     throw Exception("Could not load object file");
00133   }
00134 
00135   //#ifdef SIFTPP_TIMETRACKER
00136   __tt->ping_end(__ttc_objconv);
00137   //#endif
00138 
00139   // save object image for debugging
00140   //
00141 
00142   //#ifdef SIFTPP_TIMETRACKER
00143   __tt->ping_start(__ttc_objfeat);
00144   //#endif
00145 
00146   // COMPUTE OBJECT FEATURES
00147   __obj_features.clear();
00148   //__obj_features.reserve(1000);
00149   __obj_num_features = 0;
00150 
00151   __sigman = .5 ;
00152   __sigma0 = 1.6 * powf(2.0f, 1.0f / __levels) ;
00153 
00154   std::cout << "SiftppClassifier(ctor): init scalespace" << std::endl;
00155   // initialize scalespace
00156   VL::Sift sift(__obj_img->data, __obj_img->width, __obj_img->height, 
00157                 __sigman, __sigma0, __octaves, __levels, __first, -1, __levels+1) ;
00158   
00159   std::cout << "SiftppClassifier(ctor): detect object keypoints" << std::endl;
00160   // Run SIFTPP detector
00161   sift.detectKeypoints(__threshold, __edgeThreshold) ;
00162   // Number of keypoints
00163   __obj_num_features = sift.keypointsEnd() - sift.keypointsBegin();
00164   std::cout << "SiftppClassifier(ctor): computed '" << __obj_num_features << "' object-keypoints" << std::endl;
00165 
00166   // set descriptor options
00167   sift.setNormalizeDescriptor( ! __unnormalized ) ;
00168   sift.setMagnification( __magnif ) ;
00169 
00170   std::cout << "SiftppClassifier(ctor): run detector, compute ori and des ..." << std::endl;
00171   // Run detector, compute orientations and descriptors
00172   for( VL::Sift::KeypointsConstIter iter = sift.keypointsBegin() ;
00173        iter != sift.keypointsEnd() ; ++iter ) {
00174 
00175     //Feature * feat = new Feature();
00176     Feature feat;
00177 
00178     //std::cout << "SiftppClassifier(ctor): saving keypoint" << std::endl;
00179     feat.key = (*iter);
00180 
00181     // detect orientations
00182     VL::float_t angles [4] ;
00183     int nangles ;
00184     if( ! __noorient ) {
00185       nangles = sift.computeKeypointOrientations(angles, *iter) ;
00186     } else {
00187       nangles = 1;
00188       angles[0] = VL::float_t(0) ;
00189     }
00190     feat.number_of_desc = nangles;
00191     feat.descs = new VL::float_t*[nangles];
00192     
00193     //std::cout << "SiftppClassifier(ctor): computing '" << nangles << "' descriptors" << std::endl;
00194     // compute descriptors
00195     for(int a = 0 ; a < nangles ; ++a) {
00196       //       out << setprecision(2) << iter->x << ' ' << setprecision(2) << iter->y << ' '
00197       //          << setprecision(2) << iter->sigma << ' ' << setprecision(3) << angles[a] ;
00198       // compute descriptor
00199       feat.descs[a] = new VL::float_t[__vlen];
00200       sift.computeKeypointDescriptor(feat.descs[a], *iter, angles[a]) ;
00201     } // next angle
00202     //std::cout << "SiftppClassifier(ctor): computed '" << feat.number_of_desc << "' descriptors." << std::endl;
00203 
00204     // save feature
00205     __obj_features.push_back( feat );
00206 
00207   } // next keypoint
00208   
00209   __obj_num_features = __obj_features.size();
00210   if ( ! __obj_num_features > 0 ) {
00211     throw Exception("Could not compute object features");
00212   }
00213   std::cout << "SiftppClassifier(ctor): computed '" << __obj_num_features << "' features from object" << std::endl;
00214 
00215   //#ifdef SIFTPP_TIMETRACKER
00216   __tt->ping_end(__ttc_objfeat);
00217   //#endif
00218 
00219 }
00220 
00221 
00222 /** Destructor. */
00223 SiftppClassifier::~SiftppClassifier()
00224 {
00225   //
00226   delete __obj_img;
00227   __obj_features.clear();
00228   //
00229   //delete __image;
00230   __img_features.clear();
00231 }
00232 
00233 
00234 std::list< ROI > *
00235 SiftppClassifier::classify()
00236 {
00237   //#ifdef SIFTPP_TIMETRACKER
00238   __tt->ping_start(0);
00239   //#endif
00240 
00241   // list of ROIs to return
00242   std::list< ROI > *rv = new std::list< ROI >();
00243 
00244   // for ROI calculation
00245   int x_min = _width;
00246   int y_min = _height;
00247   int x_max = 0;
00248   int y_max = 0;
00249   
00250   //#ifdef SIFTPP_TIMETRACKER
00251   __tt->ping_start(__ttc_imgconv);
00252   //#endif
00253   std::cout << "SiftppClassifier(classify): copy imgdat to SIFTPP Image" << std::endl;
00254 
00255   VL::pixel_t * im_pt = new VL::pixel_t [_width * _height ];
00256   VL::pixel_t * start = im_pt;
00257   for (unsigned int h = 0; h < _height; ++h) {
00258     for (unsigned int w = 0; w < _width ; ++w) {
00259       int i = (_src[h * _width + w] );
00260       VL::pixel_t norm = VL::pixel_t( 255 );
00261       *start++ = VL::pixel_t( i ) / norm;
00262     }
00263   }
00264   // make image
00265   __image = new VL::PgmBuffer();
00266   __image->width  = _width;
00267   __image->height = _height;
00268   __image->data   = im_pt;
00269 
00270   //#ifdef SIFTPP_TIMETRACKER
00271   __tt->ping_end(__ttc_imgconv);
00272   //#endif
00273 
00274   /// Write image to verify correct operation
00275     // nothing yet
00276 
00277   //#ifdef SIFTPP_TIMETRACKER
00278   __tt->ping_start(__ttc_imgfeat);
00279   //#endif
00280 
00281   // COMPUTE IMAGE FEATURES
00282   __img_features.clear();
00283   __img_num_features = 0;
00284   //__img_features.reserve(1000);
00285 
00286   std::cout << "SiftppClassifier(classify): init scalespace" << std::endl;
00287   // initialize scalespace
00288   VL::Sift sift(__image->data, __image->width, __image->height, 
00289                 __sigman, __sigma0, __octaves, __levels, __first, -1, __levels+1) ;
00290   
00291   std::cout << "SiftppClassifier(classify): detect image keypoints" << std::endl;
00292   // Run SIFTPP detector
00293   sift.detectKeypoints(__threshold, __edgeThreshold) ;
00294 
00295   // Number of keypoints
00296   __img_num_features = sift.keypointsEnd() - sift.keypointsBegin();
00297   std::cout << "SiftppClassifier(classify): Extracted '" << __img_num_features << "' image keypoints" << std::endl;
00298 
00299   // set descriptor options
00300   sift.setNormalizeDescriptor( ! __unnormalized ) ;
00301   sift.setMagnification( __magnif ) ;
00302 
00303   std::cout << "SiftppClassifier(classify): run detector, compute ori and des ..." << std::endl;
00304   // Run detector, compute orientations and descriptors
00305   for( VL::Sift::KeypointsConstIter iter = sift.keypointsBegin() ;
00306        iter != sift.keypointsEnd() ; ++iter ) {
00307 
00308     Feature feat; // = new Feature();
00309     
00310     //std::cout << "SiftppClassifier(classify): saving keypoint" << std::endl;
00311     feat.key = (*iter);
00312 
00313     //std::cout << "SiftppClassifier(classify): detect orientations" << std::endl;
00314     // detect orientations
00315     VL::float_t angles [4] ;
00316     int nangles ;
00317     if( ! __noorient ) {
00318       nangles = sift.computeKeypointOrientations(angles, *iter) ;
00319     } else {
00320       nangles = 1;
00321       angles[0] = VL::float_t(0) ;
00322     }
00323     feat.number_of_desc = nangles;
00324     feat.descs = new VL::float_t*[nangles];
00325     
00326     //std::cout << "SiftppClassifier(classify): computing '" << nangles << "' descriptors" << std::endl;
00327     // compute descriptors
00328     for(int a = 0 ; a < nangles ; ++a) {
00329       // compute descriptor
00330       feat.descs[a] = new VL::float_t[__vlen] ;
00331       sift.computeKeypointDescriptor(feat.descs[a], *iter, angles[a]) ;
00332     } // next angle
00333     //std::cout << "SiftppClassifier(classify): computed '" << feat.number_of_desc << "' descriptors." << std::endl;
00334 
00335     // save feature
00336     __img_features.push_back( feat );
00337 
00338   } // next keypoint
00339 
00340   // Number of feature
00341   __img_num_features = __img_features.size();
00342 
00343   //#ifdef SIFTPP_TIMETRACKER
00344   __tt->ping_end(__ttc_imgfeat);
00345   //#endif
00346 
00347   std::cout << "SiftppClassifier(classify): Extracted '" << __img_num_features << "' image features" << std::endl;
00348 
00349   //#ifdef SIFTPP_TIMETRACKER
00350   __tt->ping_start(__ttc_matchin);
00351   //#endif
00352   std::cout << "SiftppClassifier(classify): matching ..." << std::endl;
00353 
00354   std::vector< int > matches(__obj_features.size());
00355   int m = 0;
00356   for (unsigned i = 0; i < __obj_features.size(); i++) {
00357     int match = findMatch(__obj_features[i], __img_features);
00358     matches[i] = match;
00359     if (match != -1) {
00360       std::cout << "SiftppClassifier(classify): Matched feature " << i << " in object image with feature " << match << " in image." << std::endl;
00361       /// adding feature-ROI
00362       ROI r( (int)(__img_features[matches[i]].key.x)-5, (int)(__img_features[matches[i]].key.y )-5, 11, 11, _width, _height);
00363       rv->push_back(r);
00364       // increment feature-match-count
00365       ++m;
00366     }
00367   }
00368 
00369   //#ifdef SIFTPP_TIMETRACKER
00370   __tt->ping_end(__ttc_matchin);
00371   //#endif
00372   std::cout << "SiftppClassifier(classify) matched '" << m << "' of '" << __obj_features.size() << "' features in scene." << std::endl;
00373 
00374   std::cout << "SiftppClassifier(classify): computing ROI" << std::endl;
00375   //#ifdef SIFTPP_TIMETRACKER
00376   __tt->ping_start(__ttc_roimerg);
00377   //#endif
00378   
00379   for (unsigned i = 0; i < matches.size(); i++) {
00380     if (matches[i] != -1) {
00381       if( (int)__img_features[matches[i]].key.x < x_min )
00382         x_min = (int)__img_features[matches[i]].key.x;
00383       if( (int)__img_features[matches[i]].key.y < y_min )
00384         y_min = (int)__img_features[matches[i]].key.y;
00385       if( (int)__img_features[matches[i]].key.x > x_max )
00386         x_max = (int)__img_features[matches[i]].key.x;
00387       if( (int)__img_features[matches[i]].key.y > y_max )
00388         y_max = (int)__img_features[matches[i]].key.y;
00389     }
00390   }
00391   if( m != 0 ) {
00392     ROI r(x_min, y_min, x_max-x_min, y_max-y_min, _width, _height);
00393     rv->push_back(r);
00394   }
00395   
00396   //#ifdef SIFTPP_TIMETRACKER
00397   __tt->ping_end(__ttc_roimerg);
00398   //#endif
00399 
00400   //#ifdef SIFTPP_TIMETRACKER
00401   __tt->ping_end(0);
00402   //#endif
00403 
00404   //#ifdef SIFTPP_TIMETRACKER
00405   // print timetracker statistics
00406   __tt->print_to_stdout();
00407   //#endif
00408 
00409   delete __image;
00410 
00411   std::cout << "SiftppClassifier(classify): done ... returning '" << rv->size() << "' ROIs." << std::endl;
00412   return rv;
00413 }
00414 
00415 int
00416 SiftppClassifier::findMatch(const Feature & ip1, const std::vector< Feature > & ipts) {
00417   double mind = 1e100, second = 1e100;
00418   int match = -1;
00419   
00420   for (unsigned i = 0; i < ipts.size(); i++) {
00421 
00422     if (ipts[i].number_of_desc != ip1.number_of_desc)
00423       continue;
00424     //std::cout << "SiftppClassifier(findMatch): number_of_desc matched!" << std::endl;
00425     for ( int j = 0; j < ip1.number_of_desc; ++j ) {
00426       double d = distSquare(ipts[i].descs[j], ip1.descs[j], __vlen);
00427       
00428       if (d < mind) {
00429         second = mind;
00430         mind = d;
00431         match = i;
00432       } else if (d < second) {
00433         second = d;
00434       }
00435     }
00436   }
00437   
00438   if (mind < 0.5 * second)
00439     return match;
00440   
00441   return -1;
00442 }
00443 
00444 
00445 double
00446 SiftppClassifier::distSquare(VL::float_t *v1, VL::float_t *v2, int n) {
00447   double dsq = 0.;
00448   while (n--) {
00449     dsq += (v1[n-1] - v2[n-1]) * (v1[n-1] - v2[n-1]);
00450   }
00451   //std::cout << "  dsq: '" << dsq << "'" << std::endl;
00452   return dsq;
00453 }
00454 
00455 } // end namespace firevision

Generated on Tue Feb 22 13:32:15 2011 for Fawkes API by  doxygen 1.4.7