#include <boxingtools.h>
Public Member Functions | |
| BoxSVDClassifier (const vector< vector< float > > &data, const unsigned int &classes=4) | |
| ~BoxSVDClassifier () | |
| map< unsigned int, unsigned int > | go () |
Static Public Member Functions | |
| map< unsigned int, unsigned int > | colorMappingByClassSize (const map< unsigned int, unsigned int > &grouping) |
Private Member Functions | |
| map< unsigned int, unsigned int > | randomSeedCluster (const gsl_matrix *const svd_coords, unsigned int matrix_dims) |
| map< unsigned int, unsigned int > | getIterativeCluster (const gsl_matrix *const svd_coords, const map< unsigned int, unsigned int > ¤t_grouping) |
| bool | setDims (const vector< vector< float > > &data) |
| vector< vector< float > > | getDistances (const gsl_matrix *const svd_coords, const gsl_matrix *const ref_coords) |
| map< unsigned int, unsigned int > | getMapping (const vector< vector< float > > &distances) |
Private Attributes | |
| const vector< vector< float > > & | mData |
| unsigned int | mColumns |
| unsigned int | mRows |
| unsigned int | mClasses |
|
||||||||||||
|
Definition at line 102 of file boxingtools.cpp. References data, mData, and setDims().
|
|
|
Definition at line 109 of file boxingtools.cpp. 00110 {
00111
00112 }
|
|
|
Definition at line 416 of file boxingtools.cpp. Referenced by EMAN::BoxingTools::classify(). 00417 {
00418
00419 vector<unsigned int> current_mappings;
00420 // Get the extent of the current mappings
00421 for (map< unsigned int, unsigned int>::const_iterator it = grouping.begin(); it != grouping.end(); ++it )
00422 {
00423 if ( find( current_mappings.begin(), current_mappings.end(), it->second ) == current_mappings.end() )
00424 {
00425 current_mappings.push_back( it->second );
00426 }
00427 }
00428
00429 if ( current_mappings.size() < 2 )
00430 {
00431 cerr << "Error, cannot call colMappingByClassSize when less than 2 classes have been specified, I think you created " << current_mappings.size() << " classes " << endl;
00432 throw;
00433 }
00434
00435 // Record how many data points are in each class.
00436 map<unsigned int, unsigned int> mappings_tally;
00437 for( vector<unsigned int>::const_iterator it = current_mappings.begin(); it != current_mappings.end(); ++it )
00438 {
00439 // First initialize each total to zero
00440 mappings_tally[*it] = 0;
00441 }
00442
00443 // Now do the actual counting
00444 for (map< unsigned int, unsigned int>::const_iterator it = grouping.begin(); it != grouping.end(); ++it )
00445 {
00446 mappings_tally[it->second] += 1;
00447 }
00448
00449 // find the largest tally
00450 unsigned int current_mapping_idx = 0;
00451 map< unsigned int, unsigned int> return_map;
00452 while ( mappings_tally.size() > 0 )
00453 {
00454 #if SVD_CLASSIFIER_DEBUG
00455 cout << "Printing mappings_tally" << endl;
00456 print_map(mappings_tally);
00457 #endif
00458
00459 map< unsigned int, unsigned int>::iterator it = mappings_tally.begin();
00460 map< unsigned int, unsigned int>::iterator track_it = mappings_tally.begin();
00461 unsigned int current_max = it->second;
00462 unsigned int current_idx = it->first;
00463 ++it;
00464 for (; it != mappings_tally.end(); ++it )
00465 {
00466 if ( it->second > current_max )
00467 {
00468 current_max = it->second;
00469 current_idx = it->first;
00470 track_it = it;
00471 }
00472 }
00473
00474 #if SVD_CLASSIFIER_DEBUG
00475 cout << "The mapping is " << current_idx << " to " << current_mapping_idx << endl;
00476 #endif
00477 for (map< unsigned int, unsigned int>::const_iterator group_it = grouping.begin(); group_it != grouping.end(); ++group_it )
00478 {
00479 if ( group_it->second == current_idx )
00480 {
00481 return_map[group_it->first] = current_mapping_idx;
00482 }
00483 }
00484
00485 mappings_tally.erase( current_idx );
00486
00487 current_mapping_idx++;
00488 }
00489
00490
00491 #if SVD_CLASSIFIER_DEBUG
00492 cout << "Printing adjusted classification map" << endl;
00493 print_map(return_map);
00494 #endif
00495
00496
00497 return return_map;
00498 }
|
|
||||||||||||
|
Definition at line 366 of file boxingtools.cpp. Referenced by getIterativeCluster(), and randomSeedCluster(). 00367 {
00368 // accrue the distance data - this could be done more concisely, but there shouldn't be much cost
00369 // because the data should be fairl small. By more concisely I mean, the distance data would not need
00370 // to be stored, it could be determined without storing it in distances.
00371 vector<vector<float> > distances;
00372 for (unsigned int i = 0; i < mColumns; ++i )
00373 {
00374 vector<float> ith_distances;
00375 for( unsigned int random_seed_idx = 0; random_seed_idx < mClasses; ++random_seed_idx )
00376 {
00377 float distance = 0;
00378 for (unsigned int j = 0; j < mColumns; ++j )
00379 {
00380 float value = (float)( (gsl_matrix_get( ref_coords, random_seed_idx, j) - gsl_matrix_get( svd_coords, i, j)) );
00381 distance += value * value;
00382 }
00383 ith_distances.push_back(distance);
00384 }
00385 distances.push_back(ith_distances);
00386 }
00387
00388 return distances;
00389 }
|
|
||||||||||||
|
Definition at line 243 of file boxingtools.cpp. References getDistances(), getMapping(), mClasses, and mColumns. Referenced by go(). 00244 {
00245 // Space to store the reference vectors
00246 gsl_matrix * ref_coords = gsl_matrix_calloc( mClasses, mColumns );
00247
00248 // Assumes there are a total of mClasses in the current_groupings mapping
00249 for(unsigned int i = 0; i < mClasses; ++i)
00250 {
00251 unsigned int tally = 0;
00252 for (map< unsigned int, unsigned int>::const_iterator it = current_grouping.begin(); it != current_grouping.end(); ++it )
00253 {
00254 if ( it->second == i )
00255 {
00256 for( unsigned int j = 0; j < mColumns; ++j )
00257 {
00258 gsl_matrix_set(ref_coords, i, j, gsl_matrix_get( svd_coords, it->first, j ) + gsl_matrix_get( ref_coords, i, j));
00259 }
00260 ++tally;
00261 }
00262
00263 }
00264 // then normalize the the addition
00265 if (tally != 0)
00266 for( unsigned int j = 0; j < mColumns; ++j )
00267 {
00268 gsl_matrix_set(ref_coords, i, j, gsl_matrix_get( ref_coords, i, j )/((float) tally));
00269 }
00270 }
00271
00272 vector<vector<float> > distances = getDistances(svd_coords, ref_coords);
00273
00274 #if SVD_CLASSIFIER_DEBUG
00275 cout << "The distance matrix is " << endl;
00276 for( unsigned int i = 0; i < distances.size(); ++i )
00277 {
00278 for( unsigned int j = 0; j < distances[i].size(); ++j )
00279 {
00280 cout << distances[i][j] << " ";
00281 }
00282 cout << endl;
00283 }
00284 #endif
00285
00286
00287 // Finally decide which of the randomly chosen vectors is closest to each of the input vectors
00288 // and use that as the basis of the grouping
00289 map< unsigned int, unsigned int> return_map = getMapping(distances);
00290
00291 #if SVD_CLASSIFIER_DEBUG
00292 cout << "Printing classification map" << endl;
00293 print_map(return_map);
00294 #endif
00295
00296 gsl_matrix_free(ref_coords);
00297
00298 return return_map;
00299 }
|
|
|
Definition at line 391 of file boxingtools.cpp. References min. Referenced by getIterativeCluster(), and randomSeedCluster(). 00392 {
00393 // Finally decide which of the randomly chosen vectors is closest to each of the input vectors
00394 // and use that as the basis of the grouping
00395 map< unsigned int, unsigned int> return_map;
00396 unsigned int vector_idx = 0;
00397 for( vector<vector<float> >::const_iterator it = distances.begin(); it != distances.end(); ++it, ++vector_idx )
00398 {
00399 vector<float>::const_iterator mIt = it->begin();
00400 float min = *mIt;
00401 unsigned int min_idx = 0;
00402 for ( unsigned int current_idx = 0; mIt != it->end(); ++mIt, ++current_idx )
00403 {
00404 if ( *mIt < min )
00405 {
00406 min = *mIt;
00407 min_idx = current_idx;
00408 }
00409 }
00410 return_map[vector_idx] = min_idx;
00411 }
00412
00413 return return_map;
00414 }
|
|
|
Definition at line 134 of file boxingtools.cpp. References getIterativeCluster(), mColumns, mData, mRows, norm(), randomSeedCluster(), and V. Referenced by EMAN::BoxingTools::classify(). 00135 {
00136 // This is done in the constructor
00137 // setDims(mData);
00138
00139
00140 unsigned int local_columns = mColumns;
00141 if ( mRows < mColumns )
00142 {
00143 // cerr << "Warning: gsl SVD works only when m > n, you have m = " << mRows << " and n = " << mColumns << endl;
00144 // This local adaptation means things will proceed the same way even if there are more columns in A then rows
00145 // Every input data is still classified, just the SVD eigenvectors are found using a subset of all the data
00146 local_columns = mRows;
00147 }
00148
00149 gsl_matrix * U = gsl_matrix_calloc( mRows, local_columns );
00150 gsl_matrix * A = gsl_matrix_calloc( mRows, mColumns );
00151 for ( unsigned int i = 0; i < mRows; ++i )
00152 {
00153 for ( unsigned int j = 0; j < mColumns; ++j )
00154 {
00155 gsl_matrix_set( A, i, j, mData[j][i] );
00156 if ( j < local_columns )
00157 gsl_matrix_set( U, i, j, mData[j][i] );
00158 }
00159 }
00160 #if SVD_CLASSIFIER_DEBUG
00161 printMatrix( A, mRows, mColumns, "A" );
00162 #endif
00163
00164 gsl_matrix * V = gsl_matrix_calloc( local_columns, local_columns );
00165 gsl_vector * S = gsl_vector_calloc( local_columns );
00166 gsl_vector * work = gsl_vector_calloc( local_columns );
00167
00168 if ( gsl_linalg_SV_decomp (U, V, S, work) )
00169 {
00170 cerr << "ERROR: gsl returned a non zero value on application of the SVD" << endl;
00171 }
00172
00173 #if SVD_CLASSIFIER_DEBUG
00174 printMatrix( U, mRows, local_columns, "U" );
00175 printVector( S, local_columns, "S" );
00176 printMatrix( V, local_columns, local_columns, "V");
00177 #endif
00178
00179 // normalize the columns of matrix A
00180 for ( unsigned int j = 0; j < mColumns; ++j )
00181 {
00182 float norm = 0;
00183 for ( unsigned int i = 0; i < mRows; ++i )
00184 {
00185 norm += (float)(gsl_matrix_get( A, i, j)*gsl_matrix_get( A, i, j));
00186 }
00187 norm = sqrtf(norm);
00188 for ( unsigned int i = 0; i < mRows; ++i )
00189 {
00190 gsl_matrix_set( A, i, j, gsl_matrix_get(A,i,j)/norm);
00191 }
00192 }
00193
00194 #if SVD_CLASSIFIER_DEBUG
00195 for ( unsigned int j = 0; j < mColumns; ++j )
00196 {
00197 double norm = 0;
00198 for ( unsigned int i = 0; i < mRows; ++i )
00199 {
00200 norm += gsl_matrix_get( A, i, j)*gsl_matrix_get( A, i, j);
00201 }
00202 cout << "For column " << j << " the squared norm is " << norm << endl;
00203 }
00204 #endif
00205
00206
00207 gsl_matrix * svd_coords = gsl_matrix_calloc( mColumns, mColumns );
00208 // Correlate the columns of A with the columns of U and store the information in a martrix called svd_coords
00209 for ( unsigned int i = 0; i < mColumns; ++i )
00210 {
00211 for ( unsigned int j = 0; j < local_columns; ++j )
00212 {
00213 double result = 0.0;
00214 for ( unsigned int k = 0; k < mRows; ++k )
00215 {
00216 result += gsl_matrix_get(A,k,i)*gsl_matrix_get(U,k,j);
00217 }
00218 gsl_matrix_set( svd_coords, i, j, result);
00219 }
00220 }
00221
00222 #if SVD_CLASSIFIER_DEBUG
00223 printMatrix( svd_coords, mColumns, mColumns, "svd_coords" );
00224 #endif
00225
00226 map< unsigned int, unsigned int> grouping = randomSeedCluster(svd_coords, mColumns);
00227
00228 for ( unsigned int i = 0; i < 20; ++ i )
00229 {
00230 grouping = getIterativeCluster(svd_coords, grouping);
00231 }
00232
00233 gsl_matrix_free(A);
00234 gsl_matrix_free(U);
00235 gsl_matrix_free(V);
00236 gsl_vector_free(S);
00237 gsl_vector_free(work);
00238 gsl_matrix_free(svd_coords);
00239
00240 return grouping;
00241 }
|
|
||||||||||||
|
Definition at line 302 of file boxingtools.cpp. References getDistances(), getMapping(), mClasses, and mColumns. Referenced by go(). 00303 {
00304 // Seed the random number generator
00305 srand(static_cast<unsigned int>(time(0)));
00306
00307 vector<unsigned int> random_seed_indices;
00308 while ( random_seed_indices.size() < mClasses )
00309 {
00310 unsigned int random_idx = static_cast<int>(((float)rand()/RAND_MAX)*matrix_dims);
00311 if ( find( random_seed_indices.begin(), random_seed_indices.end(), random_idx ) == random_seed_indices.end() )
00312 {
00313 random_seed_indices.push_back( random_idx );
00314 }
00315 }
00316
00317 // Space to store the reference vectors
00318 gsl_matrix * ref_coords = gsl_matrix_calloc( mClasses, mColumns );
00319
00320 // Put the reference vectors into a matrix to make the approach transparent to the reader
00321 for(unsigned int i = 0; i < random_seed_indices.size(); ++i)
00322 {
00323 for( unsigned int j = 0; j < matrix_dims; ++j )
00324 {
00325 gsl_matrix_set(ref_coords, i, j, gsl_matrix_get( svd_coords, random_seed_indices[i], j ));
00326 }
00327 }
00328
00329 #if SVD_CLASSIFIER_DEBUG
00330 printMatrix( ref_coords, mClasses, matrix_dims, "Reference matrix in first grouping");
00331 #endif
00332
00333 // accrue the distance data - this could be done more concisely, but there shouldn't be much cost
00334 // because the data should be fairl small. By more concisely I mean, the distance data would not need
00335 // to be stored, it could be determined without storing it in distances.
00336 vector<vector<float> > distances = getDistances(svd_coords, ref_coords);
00337
00338 #if SVD_CLASSIFIER_DEBUG
00339 cout << "The distance matrix is " << endl;
00340 for( unsigned int i = 0; i < distances.size(); ++i )
00341 {
00342 for( unsigned int j = 0; j < distances[i].size(); ++j )
00343 {
00344 cout << distances[i][j] << " ";
00345 }
00346 cout << endl;
00347 }
00348 #endif
00349
00350
00351 // Finally decide which of the randomly chosen vectors is closest to each of the input vectors
00352 // and use that as the basis of the grouping
00353 map< unsigned int, unsigned int> return_map = getMapping(distances);
00354
00355 #if SVD_CLASSIFIER_DEBUG
00356 cout << "Printing classification map, randomly seeded" << endl;
00357 print_map(return_map);
00358 #endif
00359
00360 gsl_matrix_free(ref_coords);
00361
00362 return return_map;
00363 }
|
|
|
Definition at line 114 of file boxingtools.cpp. References data, mColumns, mData, and mRows. Referenced by BoxSVDClassifier(). 00115 {
00116 mColumns = mData.size();
00117 vector<vector<float> >::const_iterator it = data.begin();
00118 mRows = it->size();
00119 it++;
00120 for( ; it != data.end(); ++it )
00121 {
00122 if ( it->size() != mRows )
00123 {
00124 cerr << "ERROR: can not initial the BoxSVDClassifier with vectors of un-equal lengths " << endl;
00125 cerr << "The vector lengths that did not agree were " << mRows << " and " << it->size() << endl;
00126 return false;
00127 }
00128 }
00129
00130 return true;
00131 }
|
|
|
Definition at line 139 of file boxingtools.h. Referenced by getIterativeCluster(), and randomSeedCluster(). |
|
|
Definition at line 136 of file boxingtools.h. Referenced by getIterativeCluster(), go(), randomSeedCluster(), and setDims(). |
|
|
Definition at line 134 of file boxingtools.h. Referenced by BoxSVDClassifier(), go(), and setDims(). |
|
|
Definition at line 137 of file boxingtools.h. |
1.3.9.1