Main Page | Modules | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members

EMAN::BoxSVDClassifier Class Reference

#include <boxingtools.h>

List of all members.

Public Member Functions

 BoxSVDClassifier (const vector< vector< float > > &data, const unsigned int &classes=4)
 ~BoxSVDClassifier ()
map< unsigned int, unsigned
int > 
go ()

Static Public Member Functions

map< unsigned int, unsigned
int > 
colorMappingByClassSize (const map< unsigned int, unsigned int > &grouping)

Private Member Functions

map< unsigned int, unsigned
int > 
randomSeedCluster (const gsl_matrix *const svd_coords, unsigned int matrix_dims)
map< unsigned int, unsigned
int > 
getIterativeCluster (const gsl_matrix *const svd_coords, const map< unsigned int, unsigned int > &current_grouping)
bool setDims (const vector< vector< float > > &data)
vector< vector< float > > getDistances (const gsl_matrix *const svd_coords, const gsl_matrix *const ref_coords)
map< unsigned int, unsigned
int > 
getMapping (const vector< vector< float > > &distances)

Private Attributes

const vector< vector< float > > & mData
unsigned int mColumns
unsigned int mRows
unsigned int mClasses


Constructor & Destructor Documentation

BoxSVDClassifier::BoxSVDClassifier const vector< vector< float > > &  data,
const unsigned int &  classes = 4
 

Definition at line 102 of file boxingtools.cpp.

References data, mData, and setDims().

00102                                                                                                   :
00103                 mData(data), mClasses(classes)
00104 {
00105         setDims( mData );
00106 }

BoxSVDClassifier::~BoxSVDClassifier  ) 
 

Definition at line 109 of file boxingtools.cpp.

00110 {
00111 
00112 }


Member Function Documentation

map< unsigned int, unsigned int > BoxSVDClassifier::colorMappingByClassSize const map< unsigned int, unsigned int > &  grouping  )  [static]
 

Definition at line 416 of file boxingtools.cpp.

Referenced by EMAN::BoxingTools::classify().

00417 {
00418 
00419         vector<unsigned int> current_mappings;
00420         // Get the extent of the current mappings
00421         for (map< unsigned int, unsigned int>::const_iterator it = grouping.begin(); it != grouping.end(); ++it )
00422         {
00423                 if ( find( current_mappings.begin(), current_mappings.end(), it->second ) == current_mappings.end() )
00424                 {
00425                         current_mappings.push_back( it->second );
00426                 }
00427         }
00428 
00429         if ( current_mappings.size() < 2 )
00430         {
00431                 cerr << "Error, cannot call colMappingByClassSize when less than 2 classes have been specified, I think you created " << current_mappings.size() << " classes " << endl;
00432                 throw;
00433         }
00434 
00435         // Record how many data points are in each class.
00436         map<unsigned int, unsigned int> mappings_tally;
00437         for( vector<unsigned int>::const_iterator it = current_mappings.begin(); it != current_mappings.end(); ++it )
00438         {
00439                 // First initialize each total to zero
00440                 mappings_tally[*it] = 0;
00441         }
00442 
00443         // Now do the actual counting
00444         for (map< unsigned int, unsigned int>::const_iterator it = grouping.begin(); it != grouping.end(); ++it )
00445         {
00446                 mappings_tally[it->second] += 1;
00447         }
00448 
00449         // find the largest tally
00450         unsigned int current_mapping_idx = 0;
00451         map< unsigned int, unsigned int> return_map;
00452         while ( mappings_tally.size() > 0 )
00453         {
00454 #if SVD_CLASSIFIER_DEBUG
00455                 cout << "Printing mappings_tally" << endl;
00456                 print_map(mappings_tally);
00457 #endif
00458 
00459                 map< unsigned int, unsigned int>::iterator it = mappings_tally.begin();
00460                 map< unsigned int, unsigned int>::iterator track_it = mappings_tally.begin();
00461                 unsigned int current_max = it->second;
00462                 unsigned int current_idx = it->first;
00463                 ++it;
00464                 for (; it != mappings_tally.end(); ++it )
00465                 {
00466                         if ( it->second > current_max )
00467                         {
00468                                 current_max = it->second;
00469                                 current_idx = it->first;
00470                                 track_it = it;
00471                         }
00472                 }
00473 
00474 #if SVD_CLASSIFIER_DEBUG
00475                 cout << "The mapping is " << current_idx << " to " << current_mapping_idx << endl;
00476 #endif
00477                 for (map< unsigned int, unsigned int>::const_iterator group_it = grouping.begin(); group_it != grouping.end(); ++group_it )
00478                 {
00479                         if ( group_it->second == current_idx )
00480                         {
00481                                 return_map[group_it->first] = current_mapping_idx;
00482                         }
00483                 }
00484 
00485                 mappings_tally.erase( current_idx );
00486 
00487                 current_mapping_idx++;
00488         }
00489 
00490 
00491 #if SVD_CLASSIFIER_DEBUG
00492         cout << "Printing adjusted classification map" << endl;
00493         print_map(return_map);
00494 #endif
00495 
00496 
00497         return return_map;
00498 }

vector< vector< float > > BoxSVDClassifier::getDistances const gsl_matrix *const   svd_coords,
const gsl_matrix *const   ref_coords
[private]
 

Definition at line 366 of file boxingtools.cpp.

Referenced by getIterativeCluster(), and randomSeedCluster().

00367 {
00368         // accrue the distance data - this could be done more concisely, but there shouldn't be much cost
00369         // because the data should be fairl small. By more concisely I mean, the distance data would not need
00370         // to be stored, it could be determined without storing it in distances.
00371         vector<vector<float> > distances;
00372         for (unsigned int i = 0; i < mColumns; ++i )
00373         {
00374                 vector<float> ith_distances;
00375                 for( unsigned int random_seed_idx = 0; random_seed_idx < mClasses; ++random_seed_idx )
00376                 {
00377                         float distance = 0;
00378                         for (unsigned int j = 0; j < mColumns; ++j )
00379                         {
00380                                 float value = (float)( (gsl_matrix_get( ref_coords, random_seed_idx, j) - gsl_matrix_get( svd_coords, i, j)) );
00381                                 distance += value * value;
00382                         }
00383                         ith_distances.push_back(distance);
00384                 }
00385                 distances.push_back(ith_distances);
00386         }
00387 
00388         return distances;
00389 }

map< unsigned int, unsigned int > BoxSVDClassifier::getIterativeCluster const gsl_matrix *const   svd_coords,
const map< unsigned int, unsigned int > &  current_grouping
[private]
 

Definition at line 243 of file boxingtools.cpp.

References getDistances(), getMapping(), mClasses, and mColumns.

Referenced by go().

00244 {
00245         // Space to store the reference vectors
00246         gsl_matrix * ref_coords = gsl_matrix_calloc( mClasses, mColumns );
00247 
00248         // Assumes there are a total of mClasses in the current_groupings mapping
00249         for(unsigned int i = 0; i < mClasses; ++i)
00250         {
00251                 unsigned int tally = 0;
00252                 for (map< unsigned int, unsigned int>::const_iterator it = current_grouping.begin(); it != current_grouping.end(); ++it )
00253                 {
00254                         if ( it->second == i )
00255                         {
00256                                 for( unsigned int j = 0; j < mColumns; ++j )
00257                                 {
00258                                         gsl_matrix_set(ref_coords, i, j, gsl_matrix_get( svd_coords, it->first, j ) + gsl_matrix_get( ref_coords, i, j));
00259                                 }
00260                                 ++tally;
00261                         }
00262 
00263                 }
00264                 // then normalize the the addition
00265                 if (tally != 0)
00266                         for( unsigned int j = 0; j < mColumns; ++j )
00267                 {
00268                         gsl_matrix_set(ref_coords, i, j, gsl_matrix_get( ref_coords, i, j )/((float) tally));
00269                 }
00270         }
00271 
00272         vector<vector<float> > distances = getDistances(svd_coords, ref_coords);
00273 
00274 #if SVD_CLASSIFIER_DEBUG
00275         cout << "The distance matrix is " << endl;
00276         for( unsigned int i = 0; i < distances.size(); ++i )
00277         {
00278                 for( unsigned int j = 0; j < distances[i].size(); ++j )
00279                 {
00280                         cout << distances[i][j] << " ";
00281                 }
00282                 cout << endl;
00283         }
00284 #endif
00285 
00286 
00287         // Finally decide which of the randomly chosen vectors is closest to each of the input vectors
00288         // and use that as the basis of the grouping
00289         map< unsigned int, unsigned int> return_map = getMapping(distances);
00290 
00291 #if SVD_CLASSIFIER_DEBUG
00292         cout << "Printing classification map" << endl;
00293         print_map(return_map);
00294 #endif
00295 
00296         gsl_matrix_free(ref_coords);
00297 
00298         return return_map;
00299 }

map< unsigned int, unsigned int > BoxSVDClassifier::getMapping const vector< vector< float > > &  distances  )  [private]
 

Definition at line 391 of file boxingtools.cpp.

References min.

Referenced by getIterativeCluster(), and randomSeedCluster().

00392 {
00393         // Finally decide which of the randomly chosen vectors is closest to each of the input vectors
00394         // and use that as the basis of the grouping
00395         map< unsigned int, unsigned int> return_map;
00396         unsigned int vector_idx = 0;
00397         for( vector<vector<float> >::const_iterator it = distances.begin(); it != distances.end(); ++it, ++vector_idx )
00398         {
00399                 vector<float>::const_iterator mIt = it->begin();
00400                 float min = *mIt;
00401                 unsigned int min_idx = 0;
00402                 for ( unsigned int current_idx = 0; mIt != it->end(); ++mIt, ++current_idx )
00403                 {
00404                         if ( *mIt < min )
00405                         {
00406                                 min = *mIt;
00407                                 min_idx = current_idx;
00408                         }
00409                 }
00410                 return_map[vector_idx] = min_idx;
00411         }
00412 
00413         return return_map;
00414 }

map< unsigned int, unsigned int > BoxSVDClassifier::go  ) 
 

Definition at line 134 of file boxingtools.cpp.

References getIterativeCluster(), mColumns, mData, mRows, norm(), randomSeedCluster(), and V.

Referenced by EMAN::BoxingTools::classify().

00135 {
00136         //      This is done in the constructor
00137         //      setDims(mData);
00138 
00139 
00140         unsigned int local_columns = mColumns;
00141         if ( mRows < mColumns )
00142         {
00143 //              cerr << "Warning: gsl SVD works only when m > n, you have m = " << mRows << " and n = " << mColumns << endl;
00144                 // This local adaptation means things will proceed the same way even if there are more columns in A then rows
00145                 // Every input data is still classified, just the SVD eigenvectors are found using a subset of all the data
00146                 local_columns = mRows;
00147         }
00148 
00149         gsl_matrix * U = gsl_matrix_calloc( mRows, local_columns );
00150         gsl_matrix * A = gsl_matrix_calloc( mRows, mColumns );
00151         for ( unsigned int i = 0; i < mRows; ++i )
00152         {
00153                 for ( unsigned int j = 0; j < mColumns; ++j )
00154                 {
00155                         gsl_matrix_set( A, i, j, mData[j][i] );
00156                         if ( j < local_columns )
00157                                 gsl_matrix_set( U, i, j, mData[j][i] );
00158                 }
00159         }
00160 #if SVD_CLASSIFIER_DEBUG
00161         printMatrix( A, mRows, mColumns, "A" );
00162 #endif
00163 
00164         gsl_matrix * V = gsl_matrix_calloc( local_columns, local_columns );
00165         gsl_vector * S = gsl_vector_calloc( local_columns );
00166         gsl_vector * work = gsl_vector_calloc( local_columns );
00167 
00168         if ( gsl_linalg_SV_decomp (U, V, S, work) )
00169         {
00170                 cerr << "ERROR: gsl returned a non zero value on application of the SVD" << endl;
00171         }
00172 
00173 #if SVD_CLASSIFIER_DEBUG
00174         printMatrix( U, mRows, local_columns, "U" );
00175         printVector( S, local_columns, "S" );
00176         printMatrix( V, local_columns, local_columns, "V");
00177 #endif
00178 
00179         // normalize the columns of matrix A
00180         for ( unsigned int j = 0; j < mColumns; ++j )
00181         {
00182                 float norm = 0;
00183                 for ( unsigned int i = 0; i < mRows; ++i )
00184                 {
00185                         norm += (float)(gsl_matrix_get( A, i, j)*gsl_matrix_get( A, i, j));
00186                 }
00187                 norm = sqrtf(norm);
00188                 for ( unsigned int i = 0; i < mRows; ++i )
00189                 {
00190                         gsl_matrix_set( A, i, j, gsl_matrix_get(A,i,j)/norm);
00191                 }
00192         }
00193 
00194 #if SVD_CLASSIFIER_DEBUG
00195         for ( unsigned int j = 0; j < mColumns; ++j )
00196         {
00197                 double norm = 0;
00198                 for ( unsigned int i = 0; i < mRows; ++i )
00199                 {
00200                         norm += gsl_matrix_get( A, i, j)*gsl_matrix_get( A, i, j);
00201                 }
00202                 cout << "For column " << j << " the squared norm is " << norm << endl;
00203         }
00204 #endif
00205 
00206 
00207         gsl_matrix * svd_coords = gsl_matrix_calloc( mColumns, mColumns );
00208         // Correlate the columns of A with the columns of U and store the information in a martrix called svd_coords
00209         for ( unsigned int i = 0; i < mColumns; ++i )
00210         {
00211                 for ( unsigned int j = 0; j < local_columns; ++j )
00212                 {
00213                         double result = 0.0;
00214                         for ( unsigned int k = 0; k < mRows; ++k )
00215                         {
00216                                 result += gsl_matrix_get(A,k,i)*gsl_matrix_get(U,k,j);
00217                         }
00218                         gsl_matrix_set( svd_coords, i, j, result);
00219                 }
00220         }
00221 
00222 #if SVD_CLASSIFIER_DEBUG
00223         printMatrix( svd_coords, mColumns, mColumns, "svd_coords" );
00224 #endif
00225 
00226         map< unsigned int, unsigned int> grouping = randomSeedCluster(svd_coords, mColumns);
00227 
00228         for ( unsigned int i = 0; i < 20; ++ i )
00229         {
00230                 grouping = getIterativeCluster(svd_coords, grouping);
00231         }
00232 
00233         gsl_matrix_free(A);
00234         gsl_matrix_free(U);
00235         gsl_matrix_free(V);
00236         gsl_vector_free(S);
00237         gsl_vector_free(work);
00238         gsl_matrix_free(svd_coords);
00239 
00240         return grouping;
00241 }

map< unsigned int, unsigned int > BoxSVDClassifier::randomSeedCluster const gsl_matrix *const   svd_coords,
unsigned int  matrix_dims
[private]
 

Definition at line 302 of file boxingtools.cpp.

References getDistances(), getMapping(), mClasses, and mColumns.

Referenced by go().

00303 {
00304         // Seed the random number generator
00305         srand(static_cast<unsigned int>(time(0)));
00306 
00307         vector<unsigned int> random_seed_indices;
00308         while ( random_seed_indices.size() < mClasses )
00309         {
00310                 unsigned int random_idx = static_cast<int>(((float)rand()/RAND_MAX)*matrix_dims);
00311                 if ( find( random_seed_indices.begin(), random_seed_indices.end(), random_idx ) == random_seed_indices.end() )
00312                 {
00313                         random_seed_indices.push_back( random_idx );
00314                 }
00315         }
00316 
00317         // Space to store the reference vectors
00318         gsl_matrix * ref_coords = gsl_matrix_calloc( mClasses, mColumns );
00319 
00320         // Put the reference vectors into a matrix to make the approach transparent to the reader
00321         for(unsigned int i = 0; i < random_seed_indices.size(); ++i)
00322         {
00323                 for( unsigned int j = 0; j < matrix_dims; ++j )
00324                 {
00325                         gsl_matrix_set(ref_coords, i, j, gsl_matrix_get( svd_coords, random_seed_indices[i], j ));
00326                 }
00327         }
00328 
00329 #if SVD_CLASSIFIER_DEBUG
00330         printMatrix( ref_coords, mClasses, matrix_dims, "Reference matrix in first grouping");
00331 #endif
00332 
00333         // accrue the distance data - this could be done more concisely, but there shouldn't be much cost
00334         // because the data should be fairl small. By more concisely I mean, the distance data would not need
00335         // to be stored, it could be determined without storing it in distances.
00336         vector<vector<float> > distances = getDistances(svd_coords, ref_coords);
00337 
00338 #if SVD_CLASSIFIER_DEBUG
00339         cout << "The distance matrix is " << endl;
00340         for( unsigned int i = 0; i < distances.size(); ++i )
00341         {
00342                 for( unsigned int j = 0; j < distances[i].size(); ++j )
00343                 {
00344                         cout << distances[i][j] << " ";
00345                 }
00346                 cout << endl;
00347         }
00348 #endif
00349 
00350 
00351         // Finally decide which of the randomly chosen vectors is closest to each of the input vectors
00352         // and use that as the basis of the grouping
00353         map< unsigned int, unsigned int> return_map = getMapping(distances);
00354 
00355 #if SVD_CLASSIFIER_DEBUG
00356         cout << "Printing classification map, randomly seeded" << endl;
00357         print_map(return_map);
00358 #endif
00359 
00360         gsl_matrix_free(ref_coords);
00361 
00362         return return_map;
00363 }

bool BoxSVDClassifier::setDims const vector< vector< float > > &  data  )  [private]
 

Definition at line 114 of file boxingtools.cpp.

References data, mColumns, mData, and mRows.

Referenced by BoxSVDClassifier().

00115 {
00116         mColumns = mData.size();
00117         vector<vector<float> >::const_iterator it = data.begin();
00118         mRows = it->size();
00119         it++;
00120         for( ; it != data.end(); ++it )
00121         {
00122                 if ( it->size() != mRows )
00123                 {
00124                         cerr << "ERROR: can not initial the BoxSVDClassifier with vectors of un-equal lengths " << endl;
00125                         cerr << "The vector lengths that did not agree were " <<  mRows << " and " << it->size() << endl;
00126                         return false;
00127                 }
00128         }
00129 
00130         return true;
00131 }


Member Data Documentation

unsigned int EMAN::BoxSVDClassifier::mClasses [private]
 

Definition at line 139 of file boxingtools.h.

Referenced by getIterativeCluster(), and randomSeedCluster().

unsigned int EMAN::BoxSVDClassifier::mColumns [private]
 

Definition at line 136 of file boxingtools.h.

Referenced by getIterativeCluster(), go(), randomSeedCluster(), and setDims().

const vector<vector<float> >& EMAN::BoxSVDClassifier::mData [private]
 

Definition at line 134 of file boxingtools.h.

Referenced by BoxSVDClassifier(), go(), and setDims().

unsigned int EMAN::BoxSVDClassifier::mRows [private]
 

Definition at line 137 of file boxingtools.h.

Referenced by go(), and setDims().


The documentation for this class was generated from the following files:
Generated on Tue Jun 11 13:47:55 2013 for EMAN2 by  doxygen 1.3.9.1