old_ml_data.cpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792
  1. /*M///////////////////////////////////////////////////////////////////////////////////////
  2. //
  3. // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
  4. //
  5. // By downloading, copying, installing or using the software you agree to this license.
  6. // If you do not agree to this license, do not download, install,
  7. // copy or use the software.
  8. //
  9. //
  10. // Intel License Agreement
  11. //
  12. // Copyright (C) 2000, Intel Corporation, all rights reserved.
  13. // Third party copyrights are property of their respective owners.
  14. //
  15. // Redistribution and use in source and binary forms, with or without modification,
  16. // are permitted provided that the following conditions are met:
  17. //
  18. // * Redistribution's of source code must retain the above copyright notice,
  19. // this list of conditions and the following disclaimer.
  20. //
  21. // * Redistribution's in binary form must reproduce the above copyright notice,
  22. // this list of conditions and the following disclaimer in the documentation
  23. // and/or other materials provided with the distribution.
  24. //
  25. // * The name of Intel Corporation may not be used to endorse or promote products
  26. // derived from this software without specific prior written permission.
  27. //
  28. // This software is provided by the copyright holders and contributors "as is" and
  29. // any express or implied warranties, including, but not limited to, the implied
  30. // warranties of merchantability and fitness for a particular purpose are disclaimed.
  31. // In no event shall the Intel Corporation or contributors be liable for any direct,
  32. // indirect, incidental, special, exemplary, or consequential damages
  33. // (including, but not limited to, procurement of substitute goods or services;
  34. // loss of use, data, or profits; or business interruption) however caused
  35. // and on any theory of liability, whether in contract, strict liability,
  36. // or tort (including negligence or otherwise) arising in any way out of
  37. // the use of this software, even if advised of the possibility of such damage.
  38. //
  39. //M*/
  40. #include "old_ml_precomp.hpp"
  41. #include <ctype.h>
  42. #define MISS_VAL FLT_MAX
  43. #define CV_VAR_MISS 0
  44. CvTrainTestSplit::CvTrainTestSplit()
  45. {
  46. train_sample_part_mode = CV_COUNT;
  47. train_sample_part.count = -1;
  48. mix = false;
  49. }
  50. CvTrainTestSplit::CvTrainTestSplit( int _train_sample_count, bool _mix )
  51. {
  52. train_sample_part_mode = CV_COUNT;
  53. train_sample_part.count = _train_sample_count;
  54. mix = _mix;
  55. }
  56. CvTrainTestSplit::CvTrainTestSplit( float _train_sample_portion, bool _mix )
  57. {
  58. train_sample_part_mode = CV_PORTION;
  59. train_sample_part.portion = _train_sample_portion;
  60. mix = _mix;
  61. }
  62. ////////////////
  63. CvMLData::CvMLData()
  64. {
  65. values = missing = var_types = var_idx_mask = response_out = var_idx_out = var_types_out = 0;
  66. train_sample_idx = test_sample_idx = 0;
  67. header_lines_number = 0;
  68. sample_idx = 0;
  69. response_idx = -1;
  70. train_sample_count = -1;
  71. delimiter = ',';
  72. miss_ch = '?';
  73. //flt_separator = '.';
  74. rng = &cv::theRNG();
  75. }
  76. CvMLData::~CvMLData()
  77. {
  78. clear();
  79. }
  80. void CvMLData::free_train_test_idx()
  81. {
  82. cvReleaseMat( &train_sample_idx );
  83. cvReleaseMat( &test_sample_idx );
  84. sample_idx = 0;
  85. }
  86. void CvMLData::clear()
  87. {
  88. class_map.clear();
  89. cvReleaseMat( &values );
  90. cvReleaseMat( &missing );
  91. cvReleaseMat( &var_types );
  92. cvReleaseMat( &var_idx_mask );
  93. cvReleaseMat( &response_out );
  94. cvReleaseMat( &var_idx_out );
  95. cvReleaseMat( &var_types_out );
  96. free_train_test_idx();
  97. total_class_count = 0;
  98. response_idx = -1;
  99. train_sample_count = -1;
  100. }
  101. void CvMLData::set_header_lines_number( int idx )
  102. {
  103. header_lines_number = std::max(0, idx);
  104. }
  105. int CvMLData::get_header_lines_number() const
  106. {
  107. return header_lines_number;
  108. }
  109. static char *fgets_chomp(char *str, int n, FILE *stream)
  110. {
  111. char *head = fgets(str, n, stream);
  112. if( head )
  113. {
  114. for(char *tail = head + strlen(head) - 1; tail >= head; --tail)
  115. {
  116. if( *tail != '\r' && *tail != '\n' )
  117. break;
  118. *tail = '\0';
  119. }
  120. }
  121. return head;
  122. }
  123. int CvMLData::read_csv(const char* filename)
  124. {
  125. const int M = 1000000;
  126. const char str_delimiter[3] = { ' ', delimiter, '\0' };
  127. FILE* file = 0;
  128. CvMemStorage* storage;
  129. CvSeq* seq;
  130. char *ptr;
  131. float* el_ptr;
  132. CvSeqReader reader;
  133. int cols_count = 0;
  134. uchar *var_types_ptr = 0;
  135. clear();
  136. file = fopen( filename, "rt" );
  137. if( !file )
  138. return -1;
  139. std::vector<char> _buf(M);
  140. char* buf = &_buf[0];
  141. // skip header lines
  142. for( int i = 0; i < header_lines_number; i++ )
  143. {
  144. if( fgets( buf, M, file ) == 0 )
  145. {
  146. fclose(file);
  147. return -1;
  148. }
  149. }
  150. // read the first data line and determine the number of variables
  151. if( !fgets_chomp( buf, M, file ))
  152. {
  153. fclose(file);
  154. return -1;
  155. }
  156. ptr = buf;
  157. while( *ptr == ' ' )
  158. ptr++;
  159. for( ; *ptr != '\0'; )
  160. {
  161. if(*ptr == delimiter || *ptr == ' ')
  162. {
  163. cols_count++;
  164. ptr++;
  165. while( *ptr == ' ' ) ptr++;
  166. }
  167. else
  168. ptr++;
  169. }
  170. cols_count++;
  171. if ( cols_count == 0)
  172. {
  173. fclose(file);
  174. return -1;
  175. }
  176. // create temporary memory storage to store the whole database
  177. el_ptr = new float[cols_count];
  178. storage = cvCreateMemStorage();
  179. seq = cvCreateSeq( 0, sizeof(*seq), cols_count*sizeof(float), storage );
  180. var_types = cvCreateMat( 1, cols_count, CV_8U );
  181. cvZero( var_types );
  182. var_types_ptr = var_types->data.ptr;
  183. for(;;)
  184. {
  185. char *token = NULL;
  186. int type;
  187. token = strtok(buf, str_delimiter);
  188. if (!token)
  189. break;
  190. for (int i = 0; i < cols_count-1; i++)
  191. {
  192. str_to_flt_elem( token, el_ptr[i], type);
  193. var_types_ptr[i] |= type;
  194. token = strtok(NULL, str_delimiter);
  195. if (!token)
  196. {
  197. fclose(file);
  198. delete [] el_ptr;
  199. return -1;
  200. }
  201. }
  202. str_to_flt_elem( token, el_ptr[cols_count-1], type);
  203. var_types_ptr[cols_count-1] |= type;
  204. cvSeqPush( seq, el_ptr );
  205. if( !fgets_chomp( buf, M, file ) )
  206. break;
  207. }
  208. fclose(file);
  209. values = cvCreateMat( seq->total, cols_count, CV_32FC1 );
  210. missing = cvCreateMat( seq->total, cols_count, CV_8U );
  211. var_idx_mask = cvCreateMat( 1, values->cols, CV_8UC1 );
  212. cvSet( var_idx_mask, cvRealScalar(1) );
  213. train_sample_count = seq->total;
  214. cvStartReadSeq( seq, &reader );
  215. for(int i = 0; i < seq->total; i++ )
  216. {
  217. const float* sdata = (float*)reader.ptr;
  218. float* ddata = values->data.fl + cols_count*i;
  219. uchar* dm = missing->data.ptr + cols_count*i;
  220. for( int j = 0; j < cols_count; j++ )
  221. {
  222. ddata[j] = sdata[j];
  223. dm[j] = ( fabs( MISS_VAL - sdata[j] ) <= FLT_EPSILON );
  224. }
  225. CV_NEXT_SEQ_ELEM( seq->elem_size, reader );
  226. }
  227. if ( cvNorm( missing, 0, CV_L1 ) <= FLT_EPSILON )
  228. cvReleaseMat( &missing );
  229. cvReleaseMemStorage( &storage );
  230. delete []el_ptr;
  231. return 0;
  232. }
  233. const CvMat* CvMLData::get_values() const
  234. {
  235. return values;
  236. }
  237. const CvMat* CvMLData::get_missing() const
  238. {
  239. CV_FUNCNAME( "CvMLData::get_missing" );
  240. __BEGIN__;
  241. if ( !values )
  242. CV_ERROR( CV_StsInternal, "data is empty" );
  243. __END__;
  244. return missing;
  245. }
  246. const std::map<cv::String, int>& CvMLData::get_class_labels_map() const
  247. {
  248. return class_map;
  249. }
  250. void CvMLData::str_to_flt_elem( const char* token, float& flt_elem, int& type)
  251. {
  252. char* stopstring = NULL;
  253. flt_elem = (float)strtod( token, &stopstring );
  254. assert( stopstring );
  255. type = CV_VAR_ORDERED;
  256. if ( *stopstring == miss_ch && strlen(stopstring) == 1 ) // missed value
  257. {
  258. flt_elem = MISS_VAL;
  259. type = CV_VAR_MISS;
  260. }
  261. else
  262. {
  263. if ( (*stopstring != 0) && (*stopstring != '\n') && (strcmp(stopstring, "\r\n") != 0) ) // class label
  264. {
  265. int idx = class_map[token];
  266. if ( idx == 0)
  267. {
  268. total_class_count++;
  269. idx = total_class_count;
  270. class_map[token] = idx;
  271. }
  272. flt_elem = (float)idx;
  273. type = CV_VAR_CATEGORICAL;
  274. }
  275. }
  276. }
  277. void CvMLData::set_delimiter(char ch)
  278. {
  279. CV_FUNCNAME( "CvMLData::set_delimited" );
  280. __BEGIN__;
  281. if (ch == miss_ch /*|| ch == flt_separator*/)
  282. CV_ERROR(CV_StsBadArg, "delimited, miss_character and flt_separator must be different");
  283. delimiter = ch;
  284. __END__;
  285. }
  286. char CvMLData::get_delimiter() const
  287. {
  288. return delimiter;
  289. }
  290. void CvMLData::set_miss_ch(char ch)
  291. {
  292. CV_FUNCNAME( "CvMLData::set_miss_ch" );
  293. __BEGIN__;
  294. if (ch == delimiter/* || ch == flt_separator*/)
  295. CV_ERROR(CV_StsBadArg, "delimited, miss_character and flt_separator must be different");
  296. miss_ch = ch;
  297. __END__;
  298. }
  299. char CvMLData::get_miss_ch() const
  300. {
  301. return miss_ch;
  302. }
  303. void CvMLData::set_response_idx( int idx )
  304. {
  305. CV_FUNCNAME( "CvMLData::set_response_idx" );
  306. __BEGIN__;
  307. if ( !values )
  308. CV_ERROR( CV_StsInternal, "data is empty" );
  309. if ( idx >= values->cols)
  310. CV_ERROR( CV_StsBadArg, "idx value is not correct" );
  311. if ( response_idx >= 0 )
  312. chahge_var_idx( response_idx, true );
  313. if ( idx >= 0 )
  314. chahge_var_idx( idx, false );
  315. response_idx = idx;
  316. __END__;
  317. }
  318. int CvMLData::get_response_idx() const
  319. {
  320. CV_FUNCNAME( "CvMLData::get_response_idx" );
  321. __BEGIN__;
  322. if ( !values )
  323. CV_ERROR( CV_StsInternal, "data is empty" );
  324. __END__;
  325. return response_idx;
  326. }
  327. void CvMLData::change_var_type( int var_idx, int type )
  328. {
  329. CV_FUNCNAME( "CvMLData::change_var_type" );
  330. __BEGIN__;
  331. int var_count = 0;
  332. if ( !values )
  333. CV_ERROR( CV_StsInternal, "data is empty" );
  334. var_count = values->cols;
  335. if ( var_idx < 0 || var_idx >= var_count)
  336. CV_ERROR( CV_StsBadArg, "var_idx is not correct" );
  337. if ( type != CV_VAR_ORDERED && type != CV_VAR_CATEGORICAL)
  338. CV_ERROR( CV_StsBadArg, "type is not correct" );
  339. assert( var_types );
  340. if ( var_types->data.ptr[var_idx] == CV_VAR_CATEGORICAL && type == CV_VAR_ORDERED)
  341. CV_ERROR( CV_StsBadArg, "it`s impossible to assign CV_VAR_ORDERED type to categorical variable" );
  342. var_types->data.ptr[var_idx] = (uchar)type;
  343. __END__;
  344. return;
  345. }
  346. void CvMLData::set_var_types( const char* str )
  347. {
  348. CV_FUNCNAME( "CvMLData::set_var_types" );
  349. __BEGIN__;
  350. const char* ord = 0, *cat = 0;
  351. int var_count = 0, set_var_type_count = 0;
  352. if ( !values )
  353. CV_ERROR( CV_StsInternal, "data is empty" );
  354. var_count = values->cols;
  355. assert( var_types );
  356. ord = strstr( str, "ord" );
  357. cat = strstr( str, "cat" );
  358. if ( !ord && !cat )
  359. CV_ERROR( CV_StsBadArg, "types string is not correct" );
  360. if ( !ord && strlen(cat) == 3 ) // str == "cat"
  361. {
  362. cvSet( var_types, cvScalarAll(CV_VAR_CATEGORICAL) );
  363. return;
  364. }
  365. if ( !cat && strlen(ord) == 3 ) // str == "ord"
  366. {
  367. cvSet( var_types, cvScalarAll(CV_VAR_ORDERED) );
  368. return;
  369. }
  370. if ( ord ) // parse ord str
  371. {
  372. char* stopstring = NULL;
  373. if ( ord[3] != '[')
  374. CV_ERROR( CV_StsBadArg, "types string is not correct" );
  375. ord += 4; // pass "ord["
  376. do
  377. {
  378. int b1 = (int)strtod( ord, &stopstring );
  379. if ( *stopstring == 0 || (*stopstring != ',' && *stopstring != ']' && *stopstring != '-') )
  380. CV_ERROR( CV_StsBadArg, "types string is not correct" );
  381. ord = stopstring + 1;
  382. if ( (stopstring[0] == ',') || (stopstring[0] == ']'))
  383. {
  384. if ( var_types->data.ptr[b1] == CV_VAR_CATEGORICAL)
  385. CV_ERROR( CV_StsBadArg, "it`s impossible to assign CV_VAR_ORDERED type to categorical variable" );
  386. var_types->data.ptr[b1] = CV_VAR_ORDERED;
  387. set_var_type_count++;
  388. }
  389. else
  390. {
  391. if ( stopstring[0] == '-')
  392. {
  393. int b2 = (int)strtod( ord, &stopstring);
  394. if ( (*stopstring == 0) || (*stopstring != ',' && *stopstring != ']') )
  395. CV_ERROR( CV_StsBadArg, "types string is not correct" );
  396. ord = stopstring + 1;
  397. for (int i = b1; i <= b2; i++)
  398. {
  399. if ( var_types->data.ptr[i] == CV_VAR_CATEGORICAL)
  400. CV_ERROR( CV_StsBadArg, "it`s impossible to assign CV_VAR_ORDERED type to categorical variable" );
  401. var_types->data.ptr[i] = CV_VAR_ORDERED;
  402. }
  403. set_var_type_count += b2 - b1 + 1;
  404. }
  405. else
  406. CV_ERROR( CV_StsBadArg, "types string is not correct" );
  407. }
  408. }
  409. while (*stopstring != ']');
  410. if ( stopstring[1] != '\0' && stopstring[1] != ',')
  411. CV_ERROR( CV_StsBadArg, "types string is not correct" );
  412. }
  413. if ( cat ) // parse cat str
  414. {
  415. char* stopstring = NULL;
  416. if ( cat[3] != '[')
  417. CV_ERROR( CV_StsBadArg, "types string is not correct" );
  418. cat += 4; // pass "cat["
  419. do
  420. {
  421. int b1 = (int)strtod( cat, &stopstring );
  422. if ( *stopstring == 0 || (*stopstring != ',' && *stopstring != ']' && *stopstring != '-') )
  423. CV_ERROR( CV_StsBadArg, "types string is not correct" );
  424. cat = stopstring + 1;
  425. if ( (stopstring[0] == ',') || (stopstring[0] == ']'))
  426. {
  427. var_types->data.ptr[b1] = CV_VAR_CATEGORICAL;
  428. set_var_type_count++;
  429. }
  430. else
  431. {
  432. if ( stopstring[0] == '-')
  433. {
  434. int b2 = (int)strtod( cat, &stopstring);
  435. if ( (*stopstring == 0) || (*stopstring != ',' && *stopstring != ']') )
  436. CV_ERROR( CV_StsBadArg, "types string is not correct" );
  437. cat = stopstring + 1;
  438. for (int i = b1; i <= b2; i++)
  439. var_types->data.ptr[i] = CV_VAR_CATEGORICAL;
  440. set_var_type_count += b2 - b1 + 1;
  441. }
  442. else
  443. CV_ERROR( CV_StsBadArg, "types string is not correct" );
  444. }
  445. }
  446. while (*stopstring != ']');
  447. if ( stopstring[1] != '\0' && stopstring[1] != ',')
  448. CV_ERROR( CV_StsBadArg, "types string is not correct" );
  449. }
  450. if (set_var_type_count != var_count)
  451. CV_ERROR( CV_StsBadArg, "types string is not correct" );
  452. __END__;
  453. }
  454. const CvMat* CvMLData::get_var_types()
  455. {
  456. CV_FUNCNAME( "CvMLData::get_var_types" );
  457. __BEGIN__;
  458. uchar *var_types_out_ptr = 0;
  459. int avcount, vt_size;
  460. if ( !values )
  461. CV_ERROR( CV_StsInternal, "data is empty" );
  462. assert( var_idx_mask );
  463. avcount = cvFloor( cvNorm( var_idx_mask, 0, CV_L1 ) );
  464. vt_size = avcount + (response_idx >= 0);
  465. if ( avcount == values->cols || (avcount == values->cols-1 && response_idx == values->cols-1) )
  466. return var_types;
  467. if ( !var_types_out || ( var_types_out && var_types_out->cols != vt_size ) )
  468. {
  469. cvReleaseMat( &var_types_out );
  470. var_types_out = cvCreateMat( 1, vt_size, CV_8UC1 );
  471. }
  472. var_types_out_ptr = var_types_out->data.ptr;
  473. for( int i = 0; i < var_types->cols; i++)
  474. {
  475. if (i == response_idx || !var_idx_mask->data.ptr[i]) continue;
  476. *var_types_out_ptr = var_types->data.ptr[i];
  477. var_types_out_ptr++;
  478. }
  479. if ( response_idx >= 0 )
  480. *var_types_out_ptr = var_types->data.ptr[response_idx];
  481. __END__;
  482. return var_types_out;
  483. }
  484. int CvMLData::get_var_type( int var_idx ) const
  485. {
  486. return var_types->data.ptr[var_idx];
  487. }
  488. const CvMat* CvMLData::get_responses()
  489. {
  490. CV_FUNCNAME( "CvMLData::get_responses_ptr" );
  491. __BEGIN__;
  492. int var_count = 0;
  493. if ( !values )
  494. CV_ERROR( CV_StsInternal, "data is empty" );
  495. var_count = values->cols;
  496. if ( response_idx < 0 || response_idx >= var_count )
  497. return 0;
  498. if ( !response_out )
  499. response_out = cvCreateMatHeader( values->rows, 1, CV_32FC1 );
  500. else
  501. cvInitMatHeader( response_out, values->rows, 1, CV_32FC1);
  502. cvGetCol( values, response_out, response_idx );
  503. __END__;
  504. return response_out;
  505. }
  506. void CvMLData::set_train_test_split( const CvTrainTestSplit * spl)
  507. {
  508. CV_FUNCNAME( "CvMLData::set_division" );
  509. __BEGIN__;
  510. int sample_count = 0;
  511. if ( !values )
  512. CV_ERROR( CV_StsInternal, "data is empty" );
  513. sample_count = values->rows;
  514. float train_sample_portion;
  515. if (spl->train_sample_part_mode == CV_COUNT)
  516. {
  517. train_sample_count = spl->train_sample_part.count;
  518. if (train_sample_count > sample_count)
  519. CV_ERROR( CV_StsBadArg, "train samples count is not correct" );
  520. train_sample_count = train_sample_count<=0 ? sample_count : train_sample_count;
  521. }
  522. else // dtype.train_sample_part_mode == CV_PORTION
  523. {
  524. train_sample_portion = spl->train_sample_part.portion;
  525. if ( train_sample_portion > 1)
  526. CV_ERROR( CV_StsBadArg, "train samples count is not correct" );
  527. train_sample_portion = train_sample_portion <= FLT_EPSILON ||
  528. 1 - train_sample_portion <= FLT_EPSILON ? 1 : train_sample_portion;
  529. train_sample_count = std::max(1, cvFloor( train_sample_portion * sample_count ));
  530. }
  531. if ( train_sample_count == sample_count )
  532. {
  533. free_train_test_idx();
  534. return;
  535. }
  536. if ( train_sample_idx && train_sample_idx->cols != train_sample_count )
  537. free_train_test_idx();
  538. if ( !sample_idx)
  539. {
  540. int test_sample_count = sample_count- train_sample_count;
  541. sample_idx = (int*)cvAlloc( sample_count * sizeof(sample_idx[0]) );
  542. for (int i = 0; i < sample_count; i++ )
  543. sample_idx[i] = i;
  544. train_sample_idx = cvCreateMatHeader( 1, train_sample_count, CV_32SC1 );
  545. *train_sample_idx = cvMat( 1, train_sample_count, CV_32SC1, &sample_idx[0] );
  546. CV_Assert(test_sample_count > 0);
  547. test_sample_idx = cvCreateMatHeader( 1, test_sample_count, CV_32SC1 );
  548. *test_sample_idx = cvMat( 1, test_sample_count, CV_32SC1, &sample_idx[train_sample_count] );
  549. }
  550. mix = spl->mix;
  551. if ( mix )
  552. mix_train_and_test_idx();
  553. __END__;
  554. }
  555. const CvMat* CvMLData::get_train_sample_idx() const
  556. {
  557. CV_FUNCNAME( "CvMLData::get_train_sample_idx" );
  558. __BEGIN__;
  559. if ( !values )
  560. CV_ERROR( CV_StsInternal, "data is empty" );
  561. __END__;
  562. return train_sample_idx;
  563. }
  564. const CvMat* CvMLData::get_test_sample_idx() const
  565. {
  566. CV_FUNCNAME( "CvMLData::get_test_sample_idx" );
  567. __BEGIN__;
  568. if ( !values )
  569. CV_ERROR( CV_StsInternal, "data is empty" );
  570. __END__;
  571. return test_sample_idx;
  572. }
  573. void CvMLData::mix_train_and_test_idx()
  574. {
  575. CV_FUNCNAME( "CvMLData::mix_train_and_test_idx" );
  576. __BEGIN__;
  577. if ( !values )
  578. CV_ERROR( CV_StsInternal, "data is empty" );
  579. __END__;
  580. if ( !sample_idx)
  581. return;
  582. if ( train_sample_count > 0 && train_sample_count < values->rows )
  583. {
  584. int n = values->rows;
  585. for (int i = 0; i < n; i++)
  586. {
  587. int a = (*rng)(n);
  588. int b = (*rng)(n);
  589. int t;
  590. CV_SWAP( sample_idx[a], sample_idx[b], t );
  591. }
  592. }
  593. }
  594. const CvMat* CvMLData::get_var_idx()
  595. {
  596. CV_FUNCNAME( "CvMLData::get_var_idx" );
  597. __BEGIN__;
  598. int avcount = 0;
  599. if ( !values )
  600. CV_ERROR( CV_StsInternal, "data is empty" );
  601. assert( var_idx_mask );
  602. avcount = cvFloor( cvNorm( var_idx_mask, 0, CV_L1 ) );
  603. int* vidx;
  604. if ( avcount == values->cols )
  605. return 0;
  606. if ( !var_idx_out || ( var_idx_out && var_idx_out->cols != avcount ) )
  607. {
  608. cvReleaseMat( &var_idx_out );
  609. var_idx_out = cvCreateMat( 1, avcount, CV_32SC1);
  610. if ( response_idx >=0 )
  611. var_idx_mask->data.ptr[response_idx] = 0;
  612. }
  613. vidx = var_idx_out->data.i;
  614. for(int i = 0; i < var_idx_mask->cols; i++)
  615. if ( var_idx_mask->data.ptr[i] )
  616. {
  617. *vidx = i;
  618. vidx++;
  619. }
  620. __END__;
  621. return var_idx_out;
  622. }
  623. void CvMLData::chahge_var_idx( int vi, bool state )
  624. {
  625. change_var_idx( vi, state );
  626. }
  627. void CvMLData::change_var_idx( int vi, bool state )
  628. {
  629. CV_FUNCNAME( "CvMLData::change_var_idx" );
  630. __BEGIN__;
  631. int var_count = 0;
  632. if ( !values )
  633. CV_ERROR( CV_StsInternal, "data is empty" );
  634. var_count = values->cols;
  635. if ( vi < 0 || vi >= var_count)
  636. CV_ERROR( CV_StsBadArg, "variable index is not correct" );
  637. assert( var_idx_mask );
  638. var_idx_mask->data.ptr[vi] = state;
  639. __END__;
  640. }
  641. /* End of file. */