Changeset 7812:adc9f31166b1 in orange
 Timestamp:
 04/06/11 11:22:42 (3 years ago)
 Branch:
 default
 Convert:
 dbd3141ee24cfeacb10eecb25c941bcceae168fb
 Location:
 source/orange
 Files:

 2 edited
Legend:
 Unmodified
 Added
 Removed

source/orange/linear.cpp
r6963 r7812 1 1 /* 2 Copyright (c) 200720 08The LIBLINEAR Project.2 Copyright (c) 20072010 The LIBLINEAR Project. 3 3 All rights reserved. 4 4 … … 37 37 Changes: 38 38 #include "linear.h" > #include "linear.ppp" 39  removed #include "tron.h", instead the contents of tron.h are in linear.h 39 40 #ifndef block around swap, min and max definition 40 41 */ … … 61 62 #endif 62 63 #endif 64 template <class S, class T> static inline void clone(T*& dst, S* src, int n) 65 { 66 dst = new T[n]; 67 memcpy((void *)dst,(void *)src,sizeof(T)*n); 68 } 63 69 64 70 #define Malloc(type,n) (type *)malloc((n)*sizeof(type)) 65 71 #define INF HUGE_VAL 66 72 67 #if 0 68 void info(const char *fmt,...) 69 { 73 74 static void print_string_stdout(const char *s) 75 { 76 fputs(s,stdout); 77 fflush(stdout); 78 } 79 80 static void (*liblinear_print_string) (const char *) = &print_string_stdout; 81 82 #if 1 83 static void info(const char *fmt,...) 84 { 85 char buf[BUFSIZ]; 70 86 va_list ap; 71 87 va_start(ap,fmt); 72 v printf(fmt,ap);88 vsprintf(buf,fmt,ap); 73 89 va_end(ap); 74 } 75 /*void info_flush() 76 { 77 fflush(stdout); 78 }*/ 79 extern void info_flush(); 90 (*liblinear_print_string)(buf); 91 } 80 92 #else 81 void info(char *fmt,...) {} 82 //extern void info(char *fmt,...); 83 void info_flush() {} 84 //extern void info_flush(); 93 static void info(const char *fmt,...) {} 85 94 #endif 86 95 87 class l2 _lr_fun : public function196 class l2r_lr_fun : public function 88 97 { 89 98 public: 90 l2 _lr_fun(const problem *prob, double Cp, double Cn);91 ~l2 _lr_fun();92 99 l2r_lr_fun(const problem *prob, double Cp, double Cn); 100 ~l2r_lr_fun(); 101 93 102 double fun(double *w); 94 103 void grad(double *w, double *g); 95 104 void Hv(double *s, double *Hs); 96 105 97 106 int get_nr_variable(void); 98 107 99 108 private: 100 109 void Xv(double *v, double *Xv); 101 110 void XTv(double *v, double *XTv); 102 111 103 112 double *C; 104 113 double *z; … … 107 116 }; 108 117 109 l2 _lr_fun::l2_lr_fun(const problem *prob, double Cp, double Cn)118 l2r_lr_fun::l2r_lr_fun(const problem *prob, double Cp, double Cn) 110 119 { 111 120 int i; 112 121 int l=prob>l; 113 122 int *y=prob>y; 114 123 115 124 this>prob = prob; 116 125 117 126 z = new double[l]; 118 127 D = new double[l]; 119 128 C = new double[l]; 120 129 121 130 for (i=0; i<l; i++) 122 131 { … … 128 137 } 129 138 130 l2 _lr_fun::~l2_lr_fun()139 l2r_lr_fun::~l2r_lr_fun() 131 140 { 132 141 delete[] z; … … 136 145 137 146 138 double l2 _lr_fun::fun(double *w)147 double l2r_lr_fun::fun(double *w) 139 148 { 140 149 int i; … … 142 151 int *y=prob>y; 143 152 int l=prob>l; 144 int n=prob>n;145 153 int w_size=get_nr_variable(); 154 146 155 Xv(w, z); 147 156 for(i=0;i<l;i++) 148 157 { 149 158 double yz = y[i]*z[i]; 150 159 if (yz >= 0) 151 160 f += C[i]*log(1 + exp(yz)); 152 161 else 153 162 f += C[i]*(yz+log(1 + exp(yz))); 154 163 } 155 164 f = 2*f; 156 for(i=0;i< n;i++)165 for(i=0;i<w_size;i++) 157 166 f += w[i]*w[i]; 158 167 f /= 2.0; 159 168 160 169 return(f); 161 170 } 162 171 163 void l2 _lr_fun::grad(double *w, double *g)172 void l2r_lr_fun::grad(double *w, double *g) 164 173 { 165 174 int i; 166 175 int *y=prob>y; 167 176 int l=prob>l; 168 int n=prob>n;169 177 int w_size=get_nr_variable(); 178 170 179 for(i=0;i<l;i++) 171 180 { … … 175 184 } 176 185 XTv(z, g); 177 178 for(i=0;i< n;i++)186 187 for(i=0;i<w_size;i++) 179 188 g[i] = w[i] + g[i]; 180 189 } 181 190 182 int l2 _lr_fun::get_nr_variable(void)191 int l2r_lr_fun::get_nr_variable(void) 183 192 { 184 193 return prob>n; 185 194 } 186 195 187 void l2 _lr_fun::Hv(double *s, double *Hs)196 void l2r_lr_fun::Hv(double *s, double *Hs) 188 197 { 189 198 int i; 190 199 int l=prob>l; 191 int n=prob>n;200 int w_size=get_nr_variable(); 192 201 double *wa = new double[l]; 193 202 194 203 Xv(s, wa); 195 204 for(i=0;i<l;i++) 196 205 wa[i] = C[i]*D[i]*wa[i]; 197 206 198 207 XTv(wa, Hs); 199 for(i=0;i< n;i++)208 for(i=0;i<w_size;i++) 200 209 Hs[i] = s[i] + Hs[i]; 201 210 delete[] wa; 202 211 } 203 212 204 void l2 _lr_fun::Xv(double *v, double *Xv)213 void l2r_lr_fun::Xv(double *v, double *Xv) 205 214 { 206 215 int i; 207 216 int l=prob>l; 208 217 feature_node **x=prob>x; 209 218 210 219 for(i=0;i<l;i++) 211 220 { … … 220 229 } 221 230 222 void l2 _lr_fun::XTv(double *v, double *XTv)231 void l2r_lr_fun::XTv(double *v, double *XTv) 223 232 { 224 233 int i; 225 234 int l=prob>l; 226 int n=prob>n;235 int w_size=get_nr_variable(); 227 236 feature_node **x=prob>x; 228 229 for(i=0;i< n;i++)237 238 for(i=0;i<w_size;i++) 230 239 XTv[i]=0; 231 240 for(i=0;i<l;i++) … … 240 249 } 241 250 242 class l2 loss_svm_fun : public function1251 class l2r_l2_svc_fun : public function 243 252 { 244 253 public: 245 l2 loss_svm_fun(const problem *prob, double Cp, double Cn);246 ~l2 loss_svm_fun();247 254 l2r_l2_svc_fun(const problem *prob, double Cp, double Cn); 255 ~l2r_l2_svc_fun(); 256 248 257 double fun(double *w); 249 258 void grad(double *w, double *g); 250 259 void Hv(double *s, double *Hs); 251 260 252 261 int get_nr_variable(void); 253 262 254 263 private: 255 264 void Xv(double *v, double *Xv); 256 265 void subXv(double *v, double *Xv); 257 266 void subXTv(double *v, double *XTv); 258 267 259 268 double *C; 260 269 double *z; … … 265 274 }; 266 275 267 l2 loss_svm_fun::l2loss_svm_fun(const problem *prob, double Cp, double Cn)276 l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double Cp, double Cn) 268 277 { 269 278 int i; 270 279 int l=prob>l; 271 280 int *y=prob>y; 272 281 273 282 this>prob = prob; 274 283 275 284 z = new double[l]; 276 285 D = new double[l]; 277 286 C = new double[l]; 278 287 I = new int[l]; 279 288 280 289 for (i=0; i<l; i++) 281 290 { … … 287 296 } 288 297 289 l2 loss_svm_fun::~l2loss_svm_fun()298 l2r_l2_svc_fun::~l2r_l2_svc_fun() 290 299 { 291 300 delete[] z; … … 295 304 } 296 305 297 double l2 loss_svm_fun::fun(double *w)306 double l2r_l2_svc_fun::fun(double *w) 298 307 { 299 308 int i; … … 301 310 int *y=prob>y; 302 311 int l=prob>l; 303 int n=prob>n;304 312 int w_size=get_nr_variable(); 313 305 314 Xv(w, z); 306 315 for(i=0;i<l;i++) 307 316 { 308 309 double d = z[i]1;310 if (d <0)317 z[i] = y[i]*z[i]; 318 double d = 1z[i]; 319 if (d > 0) 311 320 f += C[i]*d*d; 312 321 } 313 322 f = 2*f; 314 for(i=0;i< n;i++)323 for(i=0;i<w_size;i++) 315 324 f += w[i]*w[i]; 316 325 f /= 2.0; 317 326 318 327 return(f); 319 328 } 320 329 321 void l2 loss_svm_fun::grad(double *w, double *g)330 void l2r_l2_svc_fun::grad(double *w, double *g) 322 331 { 323 332 int i; 324 333 int *y=prob>y; 325 334 int l=prob>l; 326 int n=prob>n;327 335 int w_size=get_nr_variable(); 336 328 337 sizeI = 0; 329 338 for (i=0;i<l;i++) … … 335 344 } 336 345 subXTv(z, g); 337 338 for(i=0;i< n;i++)346 347 for(i=0;i<w_size;i++) 339 348 g[i] = w[i] + 2*g[i]; 340 349 } 341 350 342 int l2 loss_svm_fun::get_nr_variable(void)351 int l2r_l2_svc_fun::get_nr_variable(void) 343 352 { 344 353 return prob>n; 345 354 } 346 355 347 void l2 loss_svm_fun::Hv(double *s, double *Hs)356 void l2r_l2_svc_fun::Hv(double *s, double *Hs) 348 357 { 349 358 int i; 350 359 int l=prob>l; 351 int n=prob>n;360 int w_size=get_nr_variable(); 352 361 double *wa = new double[l]; 353 362 354 363 subXv(s, wa); 355 364 for(i=0;i<sizeI;i++) 356 365 wa[i] = C[I[i]]*wa[i]; 357 366 358 367 subXTv(wa, Hs); 359 for(i=0;i< n;i++)368 for(i=0;i<w_size;i++) 360 369 Hs[i] = s[i] + 2*Hs[i]; 361 370 delete[] wa; 362 371 } 363 372 364 void l2 loss_svm_fun::Xv(double *v, double *Xv)373 void l2r_l2_svc_fun::Xv(double *v, double *Xv) 365 374 { 366 375 int i; 367 376 int l=prob>l; 368 377 feature_node **x=prob>x; 369 378 370 379 for(i=0;i<l;i++) 371 380 { … … 380 389 } 381 390 382 void l2 loss_svm_fun::subXv(double *v, double *Xv)391 void l2r_l2_svc_fun::subXv(double *v, double *Xv) 383 392 { 384 393 int i; 385 394 feature_node **x=prob>x; 386 395 387 396 for(i=0;i<sizeI;i++) 388 397 { … … 397 406 } 398 407 399 void l2 loss_svm_fun::subXTv(double *v, double *XTv)408 void l2r_l2_svc_fun::subXTv(double *v, double *XTv) 400 409 { 401 410 int i; 402 int n=prob>n;411 int w_size=get_nr_variable(); 403 412 feature_node **x=prob>x; 404 405 for(i=0;i< n;i++)413 414 for(i=0;i<w_size;i++) 406 415 XTv[i]=0; 407 416 for(i=0;i<sizeI;i++) … … 416 425 } 417 426 418 // A coordinate descent algorithm for 427 // A coordinate descent algorithm for 428 // multiclass support vector machines by Crammer and Singer 429 // 430 // min_{\alpha} 0.5 \sum_m w_m(\alpha)^2 + \sum_i \sum_m e^m_i alpha^m_i 431 // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i 432 // 433 // where e^m_i = 0 if y_i = m, 434 // e^m_i = 1 if y_i != m, 435 // C^m_i = C if m = y_i, 436 // C^m_i = 0 if m != y_i, 437 // and w_m(\alpha) = \sum_i \alpha^m_i x_i 438 // 439 // Given: 440 // x, y, C 441 // eps is the stopping tolerance 442 // 443 // solution will be put in w 444 // 445 // See Appendix of LIBLINEAR paper, Fan et al. (2008) 446 447 #define GETI(i) (prob>y[i]) 448 // To support weights for instances, use GETI(i) (i) 449 450 class Solver_MCSVM_CS 451 { 452 public: 453 Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000); 454 ~Solver_MCSVM_CS(); 455 void Solve(double *w); 456 private: 457 void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new); 458 bool be_shrunk(int i, int m, int yi, double alpha_i, double minG); 459 double *B, *C, *G; 460 int w_size, l; 461 int nr_class; 462 int max_iter; 463 double eps; 464 const problem *prob; 465 }; 466 467 Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter) 468 { 469 this>w_size = prob>n; 470 this>l = prob>l; 471 this>nr_class = nr_class; 472 this>eps = eps; 473 this>max_iter = max_iter; 474 this>prob = prob; 475 this>B = new double[nr_class]; 476 this>G = new double[nr_class]; 477 this>C = weighted_C; 478 } 479 480 Solver_MCSVM_CS::~Solver_MCSVM_CS() 481 { 482 delete[] B; 483 delete[] G; 484 } 485 486 int compare_double(const void *a, const void *b) 487 { 488 if(*(double *)a > *(double *)b) 489 return 1; 490 if(*(double *)a < *(double *)b) 491 return 1; 492 return 0; 493 } 494 495 void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new) 496 { 497 int r; 498 double *D; 499 500 clone(D, B, active_i); 501 if(yi < active_i) 502 D[yi] += A_i*C_yi; 503 qsort(D, active_i, sizeof(double), compare_double); 504 505 double beta = D[0]  A_i*C_yi; 506 for(r=1;r<active_i && beta<r*D[r];r++) 507 beta += D[r]; 508 509 beta /= r; 510 for(r=0;r<active_i;r++) 511 { 512 if(r == yi) 513 alpha_new[r] = min(C_yi, (betaB[r])/A_i); 514 else 515 alpha_new[r] = min((double)0, (beta  B[r])/A_i); 516 } 517 delete[] D; 518 } 519 520 bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG) 521 { 522 double bound = 0; 523 if(m == yi) 524 bound = C[GETI(i)]; 525 if(alpha_i == bound && G[m] < minG) 526 return true; 527 return false; 528 } 529 530 void Solver_MCSVM_CS::Solve(double *w) 531 { 532 int i, m, s; 533 int iter = 0; 534 double *alpha = new double[l*nr_class]; 535 double *alpha_new = new double[nr_class]; 536 int *index = new int[l]; 537 double *QD = new double[l]; 538 int *d_ind = new int[nr_class]; 539 double *d_val = new double[nr_class]; 540 int *alpha_index = new int[nr_class*l]; 541 int *y_index = new int[l]; 542 int active_size = l; 543 int *active_size_i = new int[l]; 544 double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking 545 bool start_from_all = true; 546 // initial 547 for(i=0;i<l*nr_class;i++) 548 alpha[i] = 0; 549 for(i=0;i<w_size*nr_class;i++) 550 w[i] = 0; 551 for(i=0;i<l;i++) 552 { 553 for(m=0;m<nr_class;m++) 554 alpha_index[i*nr_class+m] = m; 555 feature_node *xi = prob>x[i]; 556 QD[i] = 0; 557 while(xi>index != 1) 558 { 559 QD[i] += (xi>value)*(xi>value); 560 xi++; 561 } 562 active_size_i[i] = nr_class; 563 y_index[i] = prob>y[i]; 564 index[i] = i; 565 } 566 567 while(iter < max_iter) 568 { 569 double stopping = INF; 570 for(i=0;i<active_size;i++) 571 { 572 int j = i+rand()%(active_sizei); 573 swap(index[i], index[j]); 574 } 575 for(s=0;s<active_size;s++) 576 { 577 i = index[s]; 578 double Ai = QD[i]; 579 double *alpha_i = &alpha[i*nr_class]; 580 int *alpha_index_i = &alpha_index[i*nr_class]; 581 582 if(Ai > 0) 583 { 584 for(m=0;m<active_size_i[i];m++) 585 G[m] = 1; 586 if(y_index[i] < active_size_i[i]) 587 G[y_index[i]] = 0; 588 589 feature_node *xi = prob>x[i]; 590 while(xi>index!= 1) 591 { 592 double *w_i = &w[(xi>index1)*nr_class]; 593 for(m=0;m<active_size_i[i];m++) 594 G[m] += w_i[alpha_index_i[m]]*(xi>value); 595 xi++; 596 } 597 598 double minG = INF; 599 double maxG = INF; 600 for(m=0;m<active_size_i[i];m++) 601 { 602 if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG) 603 minG = G[m]; 604 if(G[m] > maxG) 605 maxG = G[m]; 606 } 607 if(y_index[i] < active_size_i[i]) 608 if(alpha_i[prob>y[i]] < C[GETI(i)] && G[y_index[i]] < minG) 609 minG = G[y_index[i]]; 610 611 for(m=0;m<active_size_i[i];m++) 612 { 613 if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG)) 614 { 615 active_size_i[i]; 616 while(active_size_i[i]>m) 617 { 618 if(!be_shrunk(i, active_size_i[i], y_index[i], 619 alpha_i[alpha_index_i[active_size_i[i]]], minG)) 620 { 621 swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]); 622 swap(G[m], G[active_size_i[i]]); 623 if(y_index[i] == active_size_i[i]) 624 y_index[i] = m; 625 else if(y_index[i] == m) 626 y_index[i] = active_size_i[i]; 627 break; 628 } 629 active_size_i[i]; 630 } 631 } 632 } 633 634 if(active_size_i[i] <= 1) 635 { 636 active_size; 637 swap(index[s], index[active_size]); 638 s; 639 continue; 640 } 641 642 if(maxGminG <= 1e12) 643 continue; 644 else 645 stopping = max(maxG  minG, stopping); 646 647 for(m=0;m<active_size_i[i];m++) 648 B[m] = G[m]  Ai*alpha_i[alpha_index_i[m]] ; 649 650 solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new); 651 int nz_d = 0; 652 for(m=0;m<active_size_i[i];m++) 653 { 654 double d = alpha_new[m]  alpha_i[alpha_index_i[m]]; 655 alpha_i[alpha_index_i[m]] = alpha_new[m]; 656 if(fabs(d) >= 1e12) 657 { 658 d_ind[nz_d] = alpha_index_i[m]; 659 d_val[nz_d] = d; 660 nz_d++; 661 } 662 } 663 664 xi = prob>x[i]; 665 while(xi>index != 1) 666 { 667 double *w_i = &w[(xi>index1)*nr_class]; 668 for(m=0;m<nz_d;m++) 669 w_i[d_ind[m]] += d_val[m]*xi>value; 670 xi++; 671 } 672 } 673 } 674 675 iter++; 676 if(iter % 10 == 0) 677 { 678 info("."); 679 } 680 681 if(stopping < eps_shrink) 682 { 683 if(stopping < eps && start_from_all == true) 684 break; 685 else 686 { 687 active_size = l; 688 for(i=0;i<l;i++) 689 active_size_i[i] = nr_class; 690 info("*"); 691 eps_shrink = max(eps_shrink/2, eps); 692 start_from_all = true; 693 } 694 } 695 else 696 start_from_all = false; 697 } 698 699 info("\noptimization finished, #iter = %d\n",iter); 700 if (iter >= max_iter) 701 info("\nWARNING: reaching max number of iterations\n"); 702 703 // calculate objective value 704 double v = 0; 705 int nSV = 0; 706 for(i=0;i<w_size*nr_class;i++) 707 v += w[i]*w[i]; 708 v = 0.5*v; 709 for(i=0;i<l*nr_class;i++) 710 { 711 v += alpha[i]; 712 if(fabs(alpha[i]) > 0) 713 nSV++; 714 } 715 for(i=0;i<l;i++) 716 v = alpha[i*nr_class+prob>y[i]]; 717 info("Objective value = %lf\n",v); 718 info("nSV = %d\n",nSV); 719 720 delete [] alpha; 721 delete [] alpha_new; 722 delete [] index; 723 delete [] QD; 724 delete [] d_ind; 725 delete [] d_val; 726 delete [] alpha_index; 727 delete [] y_index; 728 delete [] active_size_i; 729 } 730 731 // A coordinate descent algorithm for 419 732 // L1loss and L2loss SVM dual problems 420 733 // 421 734 // min_\alpha 0.5(\alpha^T (Q + D)\alpha)  e^T \alpha, 422 735 // s.t. 0 <= alpha_i <= upper_bound_i, 423 // 736 // 424 737 // where Qij = yi yj xi^T xj and 425 // D is a diagonal matrix 738 // D is a diagonal matrix 426 739 // 427 740 // In L1SVM case: … … 429 742 // upper_bound_i = Cn if y_i = 1 430 743 // D_ii = 0 431 // In L2S vmcase:744 // In L2SVM case: 432 745 // upper_bound_i = INF 433 746 // D_ii = 1/(2*Cp) if y_i = 1 434 747 // D_ii = 1/(2*Cn) if y_i = 1 435 748 // 436 // Given: 749 // Given: 437 750 // x, y, Cp, Cn 438 751 // eps is the stopping tolerance 439 752 // 440 753 // solution will be put in w 441 442 static void solve_linear_c_svc( 443 const problem *prob, double *w, double eps, 444 double Cp, double Cn, int solver_type) 754 // 755 // See Algorithm 3 of Hsieh et al., ICML 2008 756 757 #undef GETI 758 #define GETI(i) (y[i]+1) 759 // To support weights for instances, use GETI(i) (i) 760 761 static void solve_l2r_l1l2_svc( 762 const problem *prob, double *w, double eps, 763 double Cp, double Cn, int solver_type) 445 764 { 446 765 int l = prob>l; 447 int n= prob>n;766 int w_size = prob>n; 448 767 int i, s, iter = 0; 449 768 double C, d, G; 450 769 double *QD = new double[l]; 451 int max_iter = 20000;770 int max_iter = 1000; 452 771 int *index = new int[l]; 453 772 double *alpha = new double[l]; 454 773 schar *y = new schar[l]; 455 774 int active_size = l; 456 775 457 776 // PG: projected gradient, for shrinking and stopping 458 777 double PG; … … 460 779 double PGmin_old = INF; 461 780 double PGmax_new, PGmin_new; 462 463 // default solver_type: L2LOSS_SVM_DUAL 464 double diag_p = 0.5/Cp, diag_n = 0.5/Cn; 465 double upper_bound_p = INF, upper_bound_n = INF; 466 if(solver_type == L1LOSS_SVM_DUAL) 467 { 468 diag_p = 0; diag_n = 0; 469 upper_bound_p = Cp; upper_bound_n = Cn; 470 } 471 472 for(i=0; i<n; i++) 781 782 // default solver_type: L2R_L2LOSS_SVC_DUAL 783 double diag[3] = {0.5/Cn, 0, 0.5/Cp}; 784 double upper_bound[3] = {INF, 0, INF}; 785 if(solver_type == L2R_L1LOSS_SVC_DUAL) 786 { 787 diag[0] = 0; 788 diag[2] = 0; 789 upper_bound[0] = Cn; 790 upper_bound[2] = Cp; 791 } 792 793 for(i=0; i<w_size; i++) 473 794 w[i] = 0; 474 795 for(i=0; i<l; i++) … … 477 798 if(prob>y[i] > 0) 478 799 { 479 y[i] = +1; 480 QD[i] = diag_p; 800 y[i] = +1; 481 801 } 482 802 else 483 803 { 484 804 y[i] = 1; 485 QD[i] = diag_n;486 }487 805 } 806 QD[i] = diag[GETI(i)]; 807 488 808 feature_node *xi = prob>x[i]; 489 809 while (xi>index != 1) … … 494 814 index[i] = i; 495 815 } 496 816 497 817 while (iter < max_iter) 498 818 { 499 819 PGmax_new = INF; 500 820 PGmin_new = INF; 501 821 502 822 for (i=0; i<active_size; i++) 503 823 { … … 505 825 swap(index[i], index[j]); 506 826 } 507 508 for (s=0; s <active_size; s++)827 828 for (s=0; s<active_size; s++) 509 829 { 510 830 i = index[s]; 511 831 G = 0; 512 832 schar yi = y[i]; 513 833 514 834 feature_node *xi = prob>x[i]; 515 835 while(xi>index!= 1) … … 519 839 } 520 840 G = G*yi1; 521 522 if(yi == 1) 523 { 524 C = upper_bound_p; 525 G += alpha[i]*diag_p; 526 } 527 else 528 { 529 C = upper_bound_n; 530 G += alpha[i]*diag_n; 531 } 532 841 842 C = upper_bound[GETI(i)]; 843 G += alpha[i]*diag[GETI(i)]; 844 533 845 PG = 0; 534 if (alpha[i] == 0)846 if (alpha[i] == 0) 535 847 { 536 848 if (G > PGmax_old) … … 558 870 else 559 871 PG = G; 560 872 561 873 PGmax_new = max(PGmax_new, PG); 562 874 PGmin_new = min(PGmin_new, PG); 563 875 564 876 if(fabs(PG) > 1.0e12) 565 877 { … … 575 887 } 576 888 } 577 889 578 890 iter++; 579 891 if(iter % 10 == 0) 580 {581 892 info("."); 582 info_flush(); 583 } 584 893 585 894 if(PGmax_new  PGmin_new <= eps) 586 895 { … … 590 899 { 591 900 active_size = l; 592 info("*"); info_flush();901 info("*"); 593 902 PGmax_old = INF; 594 903 PGmin_old = INF; … … 603 912 PGmin_old = INF; 604 913 } 605 914 606 915 info("\noptimization finished, #iter = %d\n",iter); 607 916 if (iter >= max_iter) 608 info(" Warning: reaching max number of iterations\n");609 917 info("\nWARNING: reaching max number of iterations\nUsing s 2 may be faster (also see FAQ)\n\n"); 918 610 919 // calculate objective value 611 920 612 921 double v = 0; 613 922 int nSV = 0; 614 for(i=0; i< n; i++)923 for(i=0; i<w_size; i++) 615 924 v += w[i]*w[i]; 616 925 for(i=0; i<l; i++) 617 926 { 618 if (y[i] == 1) 619 v += alpha[i]*(alpha[i]*diag_p  2); 620 else 621 v += alpha[i]*(alpha[i]*diag_n  2); 927 v += alpha[i]*(alpha[i]*diag[GETI(i)]  2); 622 928 if(alpha[i] > 0) 623 929 ++nSV; … … 625 931 info("Objective value = %lf\n",v/2); 626 932 info("nSV = %d\n",nSV); 627 933 628 934 delete [] QD; 629 935 delete [] alpha; … … 632 938 } 633 939 940 // A coordinate descent algorithm for 941 // the dual of L2regularized logistic regression problems 942 // 943 // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i  alpha_i) log (upper_bound_i  alpha_i) , 944 // s.t. 0 <= alpha_i <= upper_bound_i, 945 // 946 // where Qij = yi yj xi^T xj and 947 // upper_bound_i = Cp if y_i = 1 948 // upper_bound_i = Cn if y_i = 1 949 // 950 // Given: 951 // x, y, Cp, Cn 952 // eps is the stopping tolerance 953 // 954 // solution will be put in w 955 // 956 // See Algorithm 5 of Yu et al., MLJ 2010 957 958 #undef GETI 959 #define GETI(i) (y[i]+1) 960 // To support weights for instances, use GETI(i) (i) 961 962 void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn) 963 { 964 int l = prob>l; 965 int w_size = prob>n; 966 int i, s, iter = 0; 967 double *xTx = new double[l]; 968 int max_iter = 1000; 969 int *index = new int[l]; 970 double *alpha = new double[2*l]; // store alpha and C  alpha 971 schar *y = new schar[l]; 972 int max_inner_iter = 100; // for inner Newton 973 double innereps = 1e2; 974 double innereps_min = min(1e8, eps); 975 double upper_bound[3] = {Cn, 0, Cp}; 976 977 for(i=0; i<w_size; i++) 978 w[i] = 0; 979 for(i=0; i<l; i++) 980 { 981 if(prob>y[i] > 0) 982 { 983 y[i] = +1; 984 } 985 else 986 { 987 y[i] = 1; 988 } 989 alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e8); 990 alpha[2*i+1] = upper_bound[GETI(i)]  alpha[2*i]; 991 992 xTx[i] = 0; 993 feature_node *xi = prob>x[i]; 994 while (xi>index != 1) 995 { 996 xTx[i] += (xi>value)*(xi>value); 997 w[xi>index1] += y[i]*alpha[2*i]*xi>value; 998 xi++; 999 } 1000 index[i] = i; 1001 } 1002 1003 while (iter < max_iter) 1004 { 1005 for (i=0; i<l; i++) 1006 { 1007 int j = i+rand()%(li); 1008 swap(index[i], index[j]); 1009 } 1010 int newton_iter = 0; 1011 double Gmax = 0; 1012 for (s=0; s<l; s++) 1013 { 1014 i = index[s]; 1015 schar yi = y[i]; 1016 double C = upper_bound[GETI(i)]; 1017 double ywTx = 0, xisq = xTx[i]; 1018 feature_node *xi = prob>x[i]; 1019 while (xi>index != 1) 1020 { 1021 ywTx += w[xi>index1]*xi>value; 1022 xi++; 1023 } 1024 ywTx *= y[i]; 1025 double a = xisq, b = ywTx; 1026 1027 // Decide to minimize g_1(z) or g_2(z) 1028 int ind1 = 2*i, ind2 = 2*i+1, sign = 1; 1029 if(0.5*a*(alpha[ind2]alpha[ind1])+b < 0) 1030 { 1031 ind1 = 2*i+1; 1032 ind2 = 2*i; 1033 sign = 1; 1034 } 1035 1036 // g_t(z) = z*log(z) + (Cz)*log(Cz) + 0.5a(zalpha_old)^2 + sign*b(zalpha_old) 1037 double alpha_old = alpha[ind1]; 1038 double z = alpha_old; 1039 if(C  z < 0.5 * C) 1040 z = 0.1*z; 1041 double gp = a*(zalpha_old)+sign*b+log(z/(Cz)); 1042 Gmax = max(Gmax, fabs(gp)); 1043 1044 // Newton method on the subproblem 1045 const double eta = 0.1; // xi in the paper 1046 int inner_iter = 0; 1047 while (inner_iter <= max_inner_iter) 1048 { 1049 if(fabs(gp) < innereps) 1050 break; 1051 double gpp = a + C/(Cz)/z; 1052 double tmpz = z  gp/gpp; 1053 if(tmpz <= 0) 1054 z *= eta; 1055 else // tmpz in (0, C) 1056 z = tmpz; 1057 gp = a*(zalpha_old)+sign*b+log(z/(Cz)); 1058 newton_iter++; 1059 inner_iter++; 1060 } 1061 1062 if(inner_iter > 0) // update w 1063 { 1064 alpha[ind1] = z; 1065 alpha[ind2] = Cz; 1066 xi = prob>x[i]; 1067 while (xi>index != 1) 1068 { 1069 w[xi>index1] += sign*(zalpha_old)*yi*xi>value; 1070 xi++; 1071 } 1072 } 1073 } 1074 1075 iter++; 1076 if(iter % 10 == 0) 1077 info("."); 1078 1079 if(Gmax < eps) 1080 break; 1081 1082 if(newton_iter < l/10) 1083 innereps = max(innereps_min, 0.1*innereps); 1084 1085 } 1086 1087 info("\noptimization finished, #iter = %d\n",iter); 1088 if (iter >= max_iter) 1089 info("\nWARNING: reaching max number of iterations\nUsing s 0 may be faster (also see FAQ)\n\n"); 1090 1091 // calculate objective value 1092 1093 double v = 0; 1094 for(i=0; i<w_size; i++) 1095 v += w[i] * w[i]; 1096 v *= 0.5; 1097 for(i=0; i<l; i++) 1098 v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1]) 1099  upper_bound[GETI(i)] * log(upper_bound[GETI(i)]); 1100 info("Objective value = %lf\n", v); 1101 1102 delete [] xTx; 1103 delete [] alpha; 1104 delete [] y; 1105 delete [] index; 1106 } 1107 1108 // A coordinate descent algorithm for 1109 // L1regularized L2loss support vector classification 1110 // 1111 // min_w \sum wj + C \sum max(0, 1yi w^T xi)^2, 1112 // 1113 // Given: 1114 // x, y, Cp, Cn 1115 // eps is the stopping tolerance 1116 // 1117 // solution will be put in w 1118 // 1119 // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008) 1120 1121 #undef GETI 1122 #define GETI(i) (y[i]+1) 1123 // To support weights for instances, use GETI(i) (i) 1124 1125 static void solve_l1r_l2_svc( 1126 problem *prob_col, double *w, double eps, 1127 double Cp, double Cn) 1128 { 1129 int l = prob_col>l; 1130 int w_size = prob_col>n; 1131 int j, s, iter = 0; 1132 int max_iter = 1000; 1133 int active_size = w_size; 1134 int max_num_linesearch = 20; 1135 1136 double sigma = 0.01; 1137 double d, G_loss, G, H; 1138 double Gmax_old = INF; 1139 double Gmax_new; 1140 double Gmax_init; 1141 double d_old, d_diff; 1142 double loss_old, loss_new; 1143 double appxcond, cond; 1144 1145 int *index = new int[w_size]; 1146 schar *y = new schar[l]; 1147 double *b = new double[l]; // b = 1ywTx 1148 double *xj_sq = new double[w_size]; 1149 feature_node *x; 1150 1151 double C[3] = {Cn,0,Cp}; 1152 1153 for(j=0; j<l; j++) 1154 { 1155 b[j] = 1; 1156 if(prob_col>y[j] > 0) 1157 y[j] = 1; 1158 else 1159 y[j] = 1; 1160 } 1161 for(j=0; j<w_size; j++) 1162 { 1163 w[j] = 0; 1164 index[j] = j; 1165 xj_sq[j] = 0; 1166 x = prob_col>x[j]; 1167 while(x>index != 1) 1168 { 1169 int ind = x>index1; 1170 double val = x>value; 1171 x>value *= y[ind]; // x>value stores yi*xij 1172 xj_sq[j] += C[GETI(ind)]*val*val; 1173 x++; 1174 } 1175 } 1176 1177 while(iter < max_iter) 1178 { 1179 Gmax_new = 0; 1180 1181 for(j=0; j<active_size; j++) 1182 { 1183 int i = j+rand()%(active_sizej); 1184 swap(index[i], index[j]); 1185 } 1186 1187 for(s=0; s<active_size; s++) 1188 { 1189 j = index[s]; 1190 G_loss = 0; 1191 H = 0; 1192 1193 x = prob_col>x[j]; 1194 while(x>index != 1) 1195 { 1196 int ind = x>index1; 1197 if(b[ind] > 0) 1198 { 1199 double val = x>value; 1200 double tmp = C[GETI(ind)]*val; 1201 G_loss = tmp*b[ind]; 1202 H += tmp*val; 1203 } 1204 x++; 1205 } 1206 G_loss *= 2; 1207 1208 G = G_loss; 1209 H *= 2; 1210 H = max(H, 1e12); 1211 1212 double Gp = G+1; 1213 double Gn = G1; 1214 double violation = 0; 1215 if(w[j] == 0) 1216 { 1217 if(Gp < 0) 1218 violation = Gp; 1219 else if(Gn > 0) 1220 violation = Gn; 1221 else if(Gp>Gmax_old/l && Gn<Gmax_old/l) 1222 { 1223 active_size; 1224 swap(index[s], index[active_size]); 1225 s; 1226 continue; 1227 } 1228 } 1229 else if(w[j] > 0) 1230 violation = fabs(Gp); 1231 else 1232 violation = fabs(Gn); 1233 1234 Gmax_new = max(Gmax_new, violation); 1235 1236 // obtain Newton direction d 1237 if(Gp <= H*w[j]) 1238 d = Gp/H; 1239 else if(Gn >= H*w[j]) 1240 d = Gn/H; 1241 else 1242 d = w[j]; 1243 1244 if(fabs(d) < 1.0e12) 1245 continue; 1246 1247 double delta = fabs(w[j]+d)fabs(w[j]) + G*d; 1248 d_old = 0; 1249 int num_linesearch; 1250 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) 1251 { 1252 d_diff = d_old  d; 1253 cond = fabs(w[j]+d)fabs(w[j])  sigma*delta; 1254 1255 appxcond = xj_sq[j]*d*d + G_loss*d + cond; 1256 if(appxcond <= 0) 1257 { 1258 x = prob_col>x[j]; 1259 while(x>index != 1) 1260 { 1261 b[x>index1] += d_diff*x>value; 1262 x++; 1263 } 1264 break; 1265 } 1266 1267 if(num_linesearch == 0) 1268 { 1269 loss_old = 0; 1270 loss_new = 0; 1271 x = prob_col>x[j]; 1272 while(x>index != 1) 1273 { 1274 int ind = x>index1; 1275 if(b[ind] > 0) 1276 loss_old += C[GETI(ind)]*b[ind]*b[ind]; 1277 double b_new = b[ind] + d_diff*x>value; 1278 b[ind] = b_new; 1279 if(b_new > 0) 1280 loss_new += C[GETI(ind)]*b_new*b_new; 1281 x++; 1282 } 1283 } 1284 else 1285 { 1286 loss_new = 0; 1287 x = prob_col>x[j]; 1288 while(x>index != 1) 1289 { 1290 int ind = x>index1; 1291 double b_new = b[ind] + d_diff*x>value; 1292 b[ind] = b_new; 1293 if(b_new > 0) 1294 loss_new += C[GETI(ind)]*b_new*b_new; 1295 x++; 1296 } 1297 } 1298 1299 cond = cond + loss_new  loss_old; 1300 if(cond <= 0) 1301 break; 1302 else 1303 { 1304 d_old = d; 1305 d *= 0.5; 1306 delta *= 0.5; 1307 } 1308 } 1309 1310 w[j] += d; 1311 1312 // recompute b[] if line search takes too many steps 1313 if(num_linesearch >= max_num_linesearch) 1314 { 1315 info("#"); 1316 for(int i=0; i<l; i++) 1317 b[i] = 1; 1318 1319 for(int i=0; i<w_size; i++) 1320 { 1321 if(w[i]==0) continue; 1322 x = prob_col>x[i]; 1323 while(x>index != 1) 1324 { 1325 b[x>index1] = w[i]*x>value; 1326 x++; 1327 } 1328 } 1329 } 1330 } 1331 1332 if(iter == 0) 1333 Gmax_init = Gmax_new; 1334 iter++; 1335 if(iter % 10 == 0) 1336 info("."); 1337 1338 if(Gmax_new <= eps*Gmax_init) 1339 { 1340 if(active_size == w_size) 1341 break; 1342 else 1343 { 1344 active_size = w_size; 1345 info("*"); 1346 Gmax_old = INF; 1347 continue; 1348 } 1349 } 1350 1351 Gmax_old = Gmax_new; 1352 } 1353 1354 info("\noptimization finished, #iter = %d\n", iter); 1355 if(iter >= max_iter) 1356 info("\nWARNING: reaching max number of iterations\n"); 1357 1358 // calculate objective value 1359 1360 double v = 0; 1361 int nnz = 0; 1362 for(j=0; j<w_size; j++) 1363 { 1364 x = prob_col>x[j]; 1365 while(x>index != 1) 1366 { 1367 x>value *= prob_col>y[x>index1]; // restore x>value 1368 x++; 1369 } 1370 if(w[j] != 0) 1371 { 1372 v += fabs(w[j]); 1373 nnz++; 1374 } 1375 } 1376 for(j=0; j<l; j++) 1377 if(b[j] > 0) 1378 v += C[GETI(j)]*b[j]*b[j]; 1379 1380 info("Objective value = %lf\n", v); 1381 info("#nonzeros/#features = %d/%d\n", nnz, w_size); 1382 1383 delete [] index; 1384 delete [] y; 1385 delete [] b; 1386 delete [] xj_sq; 1387 } 1388 1389 // A coordinate descent algorithm for 1390 // L1regularized logistic regression problems 1391 // 1392 // min_w \sum wj + C \sum log(1+exp(yi w^T xi)), 1393 // 1394 // Given: 1395 // x, y, Cp, Cn 1396 // eps is the stopping tolerance 1397 // 1398 // solution will be put in w 1399 // 1400 // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008) 1401 1402 #undef GETI 1403 #define GETI(i) (y[i]+1) 1404 // To support weights for instances, use GETI(i) (i) 1405 1406 static void solve_l1r_lr( 1407 const problem *prob_col, double *w, double eps, 1408 double Cp, double Cn) 1409 { 1410 int l = prob_col>l; 1411 int w_size = prob_col>n; 1412 int j, s, iter = 0; 1413 int max_iter = 1000; 1414 int active_size = w_size; 1415 int max_num_linesearch = 20; 1416 1417 double x_min = 0; 1418 double sigma = 0.01; 1419 double d, G, H; 1420 double Gmax_old = INF; 1421 double Gmax_new; 1422 double Gmax_init; 1423 double sum1, appxcond1; 1424 double sum2, appxcond2; 1425 double cond; 1426 1427 int *index = new int[w_size]; 1428 schar *y = new schar[l]; 1429 double *exp_wTx = new double[l]; 1430 double *exp_wTx_new = new double[l]; 1431 double *xj_max = new double[w_size]; 1432 double *C_sum = new double[w_size]; 1433 double *xjneg_sum = new double[w_size]; 1434 double *xjpos_sum = new double[w_size]; 1435 feature_node *x; 1436 1437 double C[3] = {Cn,0,Cp}; 1438 1439 for(j=0; j<l; j++) 1440 { 1441 exp_wTx[j] = 1; 1442 if(prob_col>y[j] > 0) 1443 y[j] = 1; 1444 else 1445 y[j] = 1; 1446 } 1447 for(j=0; j<w_size; j++) 1448 { 1449 w[j] = 0; 1450 index[j] = j; 1451 xj_max[j] = 0; 1452 C_sum[j] = 0; 1453 xjneg_sum[j] = 0; 1454 xjpos_sum[j] = 0; 1455 x = prob_col>x[j]; 1456 while(x>index != 1) 1457 { 1458 int ind = x>index1; 1459 double val = x>value; 1460 x_min = min(x_min, val); 1461 xj_max[j] = max(xj_max[j], val); 1462 C_sum[j] += C[GETI(ind)]; 1463 if(y[ind] == 1) 1464 xjneg_sum[j] += C[GETI(ind)]*val; 1465 else 1466 xjpos_sum[j] += C[GETI(ind)]*val; 1467 x++; 1468 } 1469 } 1470 1471 while(iter < max_iter) 1472 { 1473 Gmax_new = 0; 1474 1475 for(j=0; j<active_size; j++) 1476 { 1477 int i = j+rand()%(active_sizej); 1478 swap(index[i], index[j]); 1479 } 1480 1481 for(s=0; s<active_size; s++) 1482 { 1483 j = index[s]; 1484 sum1 = 0; 1485 sum2 = 0; 1486 H = 0; 1487 1488 x = prob_col>x[j]; 1489 while(x>index != 1) 1490 { 1491 int ind = x>index1; 1492 double exp_wTxind = exp_wTx[ind]; 1493 double tmp1 = x>value/(1+exp_wTxind); 1494 double tmp2 = C[GETI(ind)]*tmp1; 1495 double tmp3 = tmp2*exp_wTxind; 1496 sum2 += tmp2; 1497 sum1 += tmp3; 1498 H += tmp1*tmp3; 1499 x++; 1500 } 1501 1502 G = sum2 + xjneg_sum[j]; 1503 1504 double Gp = G+1; 1505 double Gn = G1; 1506 double violation = 0; 1507 if(w[j] == 0) 1508 { 1509 if(Gp < 0) 1510 violation = Gp; 1511 else if(Gn > 0) 1512 violation = Gn; 1513 else if(Gp>Gmax_old/l && Gn<Gmax_old/l) 1514 { 1515 active_size; 1516 swap(index[s], index[active_size]); 1517 s; 1518 continue; 1519 } 1520 } 1521 else if(w[j] > 0) 1522 violation = fabs(Gp); 1523 else 1524 violation = fabs(Gn); 1525 1526 Gmax_new = max(Gmax_new, violation); 1527 1528 // obtain Newton direction d 1529 if(Gp <= H*w[j]) 1530 d = Gp/H; 1531 else if(Gn >= H*w[j]) 1532 d = Gn/H; 1533 else 1534 d = w[j]; 1535 1536 if(fabs(d) < 1.0e12) 1537 continue; 1538 1539 d = min(max(d,10.0),10.0); 1540 1541 double delta = fabs(w[j]+d)fabs(w[j]) + G*d; 1542 int num_linesearch; 1543 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) 1544 { 1545 cond = fabs(w[j]+d)fabs(w[j])  sigma*delta; 1546 1547 if(x_min >= 0) 1548 { 1549 double tmp = exp(d*xj_max[j]); 1550 appxcond1 = log(1+sum1*(tmp1)/xj_max[j]/C_sum[j])*C_sum[j] + cond  d*xjpos_sum[j]; 1551 appxcond2 = log(1+sum2*(1/tmp1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j]; 1552 if(min(appxcond1,appxcond2) <= 0) 1553 { 1554 x = prob_col>x[j]; 1555 while(x>index != 1) 1556 { 1557 exp_wTx[x>index1] *= exp(d*x>value); 1558 x++; 1559 } 1560 break; 1561 } 1562 } 1563 1564 cond += d*xjneg_sum[j]; 1565 1566 int i = 0; 1567 x = prob_col>x[j]; 1568 while(x>index != 1) 1569 { 1570 int ind = x>index1; 1571 double exp_dx = exp(d*x>value); 1572 exp_wTx_new[i] = exp_wTx[ind]*exp_dx; 1573 cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i])); 1574 x++; i++; 1575 } 1576 1577 if(cond <= 0) 1578 { 1579 int i = 0; 1580 x = prob_col>x[j]; 1581 while(x>index != 1) 1582 { 1583 int ind = x>index1; 1584 exp_wTx[ind] = exp_wTx_new[i]; 1585 x++; i++; 1586 } 1587 break; 1588 } 1589 else 1590 { 1591 d *= 0.5; 1592 delta *= 0.5; 1593 } 1594 } 1595 1596 w[j] += d; 1597 1598 // recompute exp_wTx[] if line search takes too many steps 1599 if(num_linesearch >= max_num_linesearch) 1600 { 1601 info("#"); 1602 for(int i=0; i<l; i++) 1603 exp_wTx[i] = 0; 1604 1605 for(int i=0; i<w_size; i++) 1606 { 1607 if(w[i]==0) continue; 1608 x = prob_col>x[i]; 1609 while(x>index != 1) 1610 { 1611 exp_wTx[x>index1] += w[i]*x>value; 1612 x++; 1613 } 1614 } 1615 1616 for(int i=0; i<l; i++) 1617 exp_wTx[i] = exp(exp_wTx[i]); 1618 } 1619 } 1620 1621 if(iter == 0) 1622 Gmax_init = Gmax_new; 1623 iter++; 1624 if(iter % 10 == 0) 1625 info("."); 1626 1627 if(Gmax_new <= eps*Gmax_init) 1628 { 1629 if(active_size == w_size) 1630 break; 1631 else 1632 { 1633 active_size = w_size; 1634 info("*"); 1635 Gmax_old = INF; 1636 continue; 1637 } 1638 } 1639 1640 Gmax_old = Gmax_new; 1641 } 1642 1643 info("\noptimization finished, #iter = %d\n", iter); 1644 if(iter >= max_iter) 1645 info("\nWARNING: reaching max number of iterations\n"); 1646 1647 // calculate objective value 1648 1649 double v = 0; 1650 int nnz = 0; 1651 for(j=0; j<w_size; j++) 1652 if(w[j] != 0) 1653 { 1654 v += fabs(w[j]); 1655 nnz++; 1656 } 1657 for(j=0; j<l; j++) 1658 if(y[j] == 1) 1659 v += C[GETI(j)]*log(1+1/exp_wTx[j]); 1660 else 1661 v += C[GETI(j)]*log(1+exp_wTx[j]); 1662 1663 info("Objective value = %lf\n", v); 1664 info("#nonzeros/#features = %d/%d\n", nnz, w_size); 1665 1666 delete [] index; 1667 delete [] y; 1668 delete [] exp_wTx; 1669 delete [] exp_wTx_new; 1670 delete [] xj_max; 1671 delete [] C_sum; 1672 delete [] xjneg_sum; 1673 delete [] xjpos_sum; 1674 } 1675 1676 // transpose matrix X from row format to column format 1677 static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col) 1678 { 1679 int i; 1680 int l = prob>l; 1681 int n = prob>n; 1682 int nnz = 0; 1683 int *col_ptr = new int[n+1]; 1684 feature_node *x_space; 1685 prob_col>l = l; 1686 prob_col>n = n; 1687 prob_col>y = new int[l]; 1688 prob_col>x = new feature_node*[n]; 1689 1690 for(i=0; i<l; i++) 1691 prob_col>y[i] = prob>y[i]; 1692 1693 for(i=0; i<n+1; i++) 1694 col_ptr[i] = 0; 1695 for(i=0; i<l; i++) 1696 { 1697 feature_node *x = prob>x[i]; 1698 while(x>index != 1) 1699 { 1700 nnz++; 1701 col_ptr[x>index]++; 1702 x++; 1703 } 1704 } 1705 for(i=1; i<n+1; i++) 1706 col_ptr[i] += col_ptr[i1] + 1; 1707 1708 x_space = new feature_node[nnz+n]; 1709 for(i=0; i<n; i++) 1710 prob_col>x[i] = &x_space[col_ptr[i]]; 1711 1712 for(i=0; i<l; i++) 1713 { 1714 feature_node *x = prob>x[i]; 1715 while(x>index != 1) 1716 { 1717 int ind = x>index1; 1718 x_space[col_ptr[ind]].index = i+1; // starts from 1 1719 x_space[col_ptr[ind]].value = x>value; 1720 col_ptr[ind]++; 1721 x++; 1722 } 1723 } 1724 for(i=0; i<n; i++) 1725 x_space[col_ptr[i]].index = 1; 1726 1727 *x_space_ret = x_space; 1728 1729 delete [] col_ptr; 1730 } 1731 634 1732 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data 635 1733 // perm, length l, must be allocated before calling this subroutine 636 void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)1734 static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm) 637 1735 { 638 1736 int l = prob>l; … … 643 1741 int *data_label = Malloc(int,l); 644 1742 int i; 645 1743 646 1744 for(i=0;i<l;i++) 647 1745 { … … 670 1768 } 671 1769 } 672 1770 673 1771 int *start = Malloc(int,nr_class); 674 1772 start[0] = 0; … … 683 1781 for(i=1;i<nr_class;i++) 684 1782 start[i] = start[i1]+count[i1]; 685 1783 686 1784 *nr_class_ret = nr_class; 687 1785 *label_ret = label; … … 691 1789 } 692 1790 693 void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)1791 static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn) 694 1792 { 695 1793 double eps=param>eps; 696 1794 int pos = 0; 697 1795 int neg = 0; 698 for (int i=0;i<prob>l;i++)699 if 1796 for(int i=0;i<prob>l;i++) 1797 if(prob>y[i]==+1) 700 1798 pos++; 701 1799 neg = prob>l  pos; 702 703 function 1*fun_obj=NULL;1800 1801 function *fun_obj=NULL; 704 1802 switch(param>solver_type) 705 1803 { 706 case L2 _LR:707 { 708 fun_obj=new l2 _lr_fun(prob, Cp, Cn);1804 case L2R_LR: 1805 { 1806 fun_obj=new l2r_lr_fun(prob, Cp, Cn); 709 1807 TRON tron_obj(fun_obj, eps*min(pos,neg)/prob>l); 1808 tron_obj.set_print_string(liblinear_print_string); 710 1809 tron_obj.tron(w); 711 1810 delete fun_obj; 712 1811 break; 713 1812 } 714 case L2 LOSS_SVM:715 { 716 fun_obj=new l2 loss_svm_fun(prob, Cp, Cn);1813 case L2R_L2LOSS_SVC: 1814 { 1815 fun_obj=new l2r_l2_svc_fun(prob, Cp, Cn); 717 1816 TRON tron_obj(fun_obj, eps*min(pos,neg)/prob>l); 1817 tron_obj.set_print_string(liblinear_print_string); 718 1818 tron_obj.tron(w); 719 1819 delete fun_obj; 720 1820 break; 721 1821 } 722 case L2 LOSS_SVM_DUAL:723 solve_l inear_c_svc(prob, w, eps, Cp, Cn, L2LOSS_SVM_DUAL);1822 case L2R_L2LOSS_SVC_DUAL: 1823 solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL); 724 1824 break; 725 case L1LOSS_SVM_DUAL: 726 solve_linear_c_svc(prob, w, eps, Cp, Cn, L1LOSS_SVM_DUAL); 1825 case L2R_L1LOSS_SVC_DUAL: 1826 solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL); 1827 break; 1828 case L1R_L2LOSS_SVC: 1829 { 1830 problem prob_col; 1831 feature_node *x_space = NULL; 1832 transpose(prob, &x_space ,&prob_col); 1833 solve_l1r_l2_svc(&prob_col, w, eps*min(pos,neg)/prob>l, Cp, Cn); 1834 delete [] prob_col.y; 1835 delete [] prob_col.x; 1836 delete [] x_space; 1837 break; 1838 } 1839 case L1R_LR: 1840 { 1841 problem prob_col; 1842 feature_node *x_space = NULL; 1843 transpose(prob, &x_space ,&prob_col); 1844 solve_l1r_lr(&prob_col, w, eps*min(pos,neg)/prob>l, Cp, Cn); 1845 delete [] prob_col.y; 1846 delete [] prob_col.x; 1847 delete [] x_space; 1848 break; 1849 } 1850 case L2R_LR_DUAL: 1851 solve_l2r_lr_dual(prob, w, eps, Cp, Cn); 727 1852 break; 728 1853 default: … … 733 1858 734 1859 // 735 // Interface function 1s1860 // Interface functions 736 1861 // 737 1862 model* train(const problem *prob, const parameter *param) 738 1863 { 739 int i ;1864 int i,j; 740 1865 int l = prob>l; 741 1866 int n = prob>n; 1867 int w_size = prob>n; 742 1868 model *model_ = Malloc(model,1); 743 1869 744 1870 if(prob>bias>=0) 745 1871 model_>nr_feature=n1; … … 748 1874 model_>param = *param; 749 1875 model_>bias = prob>bias; 750 1876 751 1877 int nr_class; 752 1878 int *label = NULL; … … 754 1880 int *count = NULL; 755 1881 int *perm = Malloc(int,l); 756 1882 757 1883 // group training data of the same class 758 1884 group_classes(prob,&nr_class,&label,&start,&count,perm); 759 1885 760 1886 model_>nr_class=nr_class; 761 1887 model_>label = Malloc(int,nr_class); 762 1888 for(i=0;i<nr_class;i++) 763 1889 model_>label[i] = label[i]; 764 1890 765 1891 // calculate weighted C 766 1892 double *weighted_C = Malloc(double, nr_class); … … 769 1895 for(i=0;i<param>nr_weight;i++) 770 1896 { 771 int j;772 1897 for(j=0;j<nr_class;j++) 773 1898 if(param>weight_label[i] == label[j]) 774 1899 break; 775 1900 if(j == nr_class) 776 fprintf(stderr," warning: class label %d specified in weight is not found\n", param>weight_label[i]);1901 fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param>weight_label[i]); 777 1902 else 778 1903 weighted_C[j] *= param>weight[i]; 779 1904 } 780 1905 781 1906 // constructing the subproblem 782 1907 feature_node **x = Malloc(feature_node *,l); 783 1908 for(i=0;i<l;i++) 784 1909 x[i] = prob>x[perm[i]]; 785 1910 786 1911 int k; 787 1912 problem sub_prob; … … 790 1915 sub_prob.x = Malloc(feature_node *,sub_prob.l); 791 1916 sub_prob.y = Malloc(int,sub_prob.l); 792 1917 793 1918 for(k=0; k<sub_prob.l; k++) 794 1919 sub_prob.x[k] = x[k]; 795 796 if(nr_class==2) 797 { 798 model_>w=Malloc(double, n); 799 800 int e0 = start[0]+count[0]; 801 k=0; 802 for(; k<e0; k++) 803 sub_prob.y[k] = +1; 804 for(; k<sub_prob.l; k++) 805 sub_prob.y[k] = 1; 806 807 train_one(&sub_prob, param, &model_>w[0], weighted_C[0], weighted_C[1]); 1920 1921 // multiclass svm by Crammer and Singer 1922 if(param>solver_type == MCSVM_CS) 1923 { 1924 model_>w=Malloc(double, n*nr_class); 1925 for(i=0;i<nr_class;i++) 1926 for(j=start[i];j<start[i]+count[i];j++) 1927 sub_prob.y[j] = i; 1928 Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param>eps); 1929 Solver.Solve(model_>w); 808 1930 } 809 1931 else 810 1932 { 811 model_>w=Malloc(double, n*nr_class); 812 double *w=Malloc(double, n); 813 for(i=0;i<nr_class;i++) 814 { 815 int si = start[i]; 816 int ei = si+count[i]; 817 1933 if(nr_class == 2) 1934 { 1935 model_>w=Malloc(double, w_size); 1936 1937 int e0 = start[0]+count[0]; 818 1938 k=0; 819 for(; k<si; k++) 820 sub_prob.y[k] = 1; 821 for(; k<ei; k++) 1939 for(; k<e0; k++) 822 1940 sub_prob.y[k] = +1; 823 1941 for(; k<sub_prob.l; k++) 824 1942 sub_prob.y[k] = 1; 825 826 train_one(&sub_prob, param, w, weighted_C[i], param>C); 827 828 for(int j=0;j<n;j++) 829 model_>w[j*nr_class+i] = w[j]; 830 } 831 free(w); 832 } 833 1943 1944 train_one(&sub_prob, param, &model_>w[0], weighted_C[0], weighted_C[1]); 1945 } 1946 else 1947 { 1948 model_>w=Malloc(double, w_size*nr_class); 1949 double *w=Malloc(double, w_size); 1950 for(i=0;i<nr_class;i++) 1951 { 1952 int si = start[i]; 1953 int ei = si+count[i]; 1954 1955 k=0; 1956 for(; k<si; k++) 1957 sub_prob.y[k] = 1; 1958 for(; k<ei; k++) 1959 sub_prob.y[k] = +1; 1960 for(; k<sub_prob.l; k++) 1961 sub_prob.y[k] = 1; 1962 1963 train_one(&sub_prob, param, w, weighted_C[i], param>C); 1964 1965 for(int j=0;j<w_size;j++) 1966 model_>w[j*nr_class+i] = w[j]; 1967 } 1968 free(w); 1969 } 1970 1971 } 1972 834 1973 free(x); 835 1974 free(label); … … 843 1982 } 844 1983 845 void destroy_model(struct model *model_) 846 { 847 if(model_>w != NULL) 848 free(model_>w); 849 if(model_>label != NULL) 850 free(model_>label); 851 free(model_); 852 } 853 854 const char *solver_type_table[]= 855 { 856 "L2_LR", "L2LOSS_SVM_DUAL", "L2LOSS_SVM","L1LOSS_SVM_DUAL", NULL 1984 void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target) 1985 { 1986 int i; 1987 int *fold_start = Malloc(int,nr_fold+1); 1988 int l = prob>l; 1989 int *perm = Malloc(int,l); 1990 1991 for(i=0;i<l;i++) perm[i]=i; 1992 for(i=0;i<l;i++) 1993 { 1994 int j = i+rand()%(li); 1995 swap(perm[i],perm[j]); 1996 } 1997 for(i=0;i<=nr_fold;i++) 1998 fold_start[i]=i*l/nr_fold; 1999 2000 for(i=0;i<nr_fold;i++) 2001 { 2002 int begin = fold_start[i]; 2003 int end = fold_start[i+1]; 2004 int j,k; 2005 struct problem subprob; 2006 2007 subprob.bias = prob>bias; 2008 subprob.n = prob>n; 2009 subprob.l = l(endbegin); 2010 subprob.x = Malloc(struct feature_node*,subprob.l); 2011 subprob.y = Malloc(int,subprob.l); 2012 2013 k=0; 2014 for(j=0;j<begin;j++) 2015 { 2016 subprob.x[k] = prob>x[perm[j]]; 2017 subprob.y[k] = prob>y[perm[j]]; 2018 ++k; 2019 } 2020 for(j=end;j<l;j++) 2021 { 2022 subprob.x[k] = prob>x[perm[j]]; 2023 subprob.y[k] = prob>y[perm[j]]; 2024 ++k; 2025 } 2026 struct model *submodel = train(&subprob,param); 2027 for(j=begin;j<end;j++) 2028 target[perm[j]] = predict(submodel,prob>x[perm[j]]); 2029 free_and_destroy_model(&submodel); 2030 free(subprob.x); 2031 free(subprob.y); 2032 } 2033 free(fold_start); 2034 free(perm); 2035 } 2036 2037 int predict_values(const struct model *model_, const struct feature_node *x, double *dec_values) 2038 { 2039 int idx; 2040 int n; 2041 if(model_>bias>=0) 2042 n=model_>nr_feature+1; 2043 else 2044 n=model_>nr_feature; 2045 double *w=model_>w; 2046 int nr_class=model_>nr_class; 2047 int i; 2048 int nr_w; 2049 if(nr_class==2 && model_>param.solver_type != MCSVM_CS) 2050 nr_w = 1; 2051 else 2052 nr_w = nr_class; 2053 2054 const feature_node *lx=x; 2055 for(i=0;i<nr_w;i++) 2056 dec_values[i] = 0; 2057 for(; (idx=lx>index)!=1; lx++) 2058 { 2059 // the dimension of testing data may exceed that of training 2060 if(idx<=n) 2061 for(i=0;i<nr_w;i++) 2062 dec_values[i] += w[(idx1)*nr_w+i]*lx>value; 2063 } 2064 2065 if(nr_class==2) 2066 return (dec_values[0]>0)?model_>label[0]:model_>label[1]; 2067 else 2068 { 2069 int dec_max_idx = 0; 2070 for(i=1;i<nr_class;i++) 2071 { 2072 if(dec_values[i] > dec_values[dec_max_idx]) 2073 dec_max_idx = i; 2074 } 2075 return model_>label[dec_max_idx]; 2076 } 2077 } 2078 2079 int predict(const model *model_, const feature_node *x) 2080 { 2081 double *dec_values = Malloc(double, model_>nr_class); 2082 int label=predict_values(model_, x, dec_values); 2083 free(dec_values); 2084 return label; 2085 } 2086 2087 int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates) 2088 { 2089 if(check_probability_model(model_)) 2090 { 2091 int i; 2092 int nr_class=model_>nr_class; 2093 int nr_w; 2094 if(nr_class==2) 2095 nr_w = 1; 2096 else 2097 nr_w = nr_class; 2098 2099 int label=predict_values(model_, x, prob_estimates); 2100 for(i=0;i<nr_w;i++) 2101 prob_estimates[i]=1/(1+exp(prob_estimates[i])); 2102 2103 if(nr_class==2) // for binary classification 2104 prob_estimates[1]=1.prob_estimates[0]; 2105 else 2106 { 2107 double sum=0; 2108 for(i=0; i<nr_class; i++) 2109 sum+=prob_estimates[i]; 2110 2111 for(i=0; i<nr_class; i++) 2112 prob_estimates[i]=prob_estimates[i]/sum; 2113 } 2114 2115 return label; 2116 } 2117 else 2118 return 0; 2119 } 2120 2121 static const char *solver_type_table[]= 2122 { 2123 "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS", 2124 "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL", NULL 857 2125 }; 858 2126 … … 863 2131 int n; 864 2132 const parameter& param = model_>param; 865 2133 866 2134 if(model_>bias>=0) 867 2135 n=nr_feature+1; 868 2136 else 869 2137 n=nr_feature; 2138 int w_size = n; 870 2139 FILE *fp = fopen(model_file_name,"w"); 871 2140 if(fp==NULL) return 1; 872 873 int nr_ classifier;874 if(model_>nr_class==2 )875 nr_ classifier=1;2141 2142 int nr_w; 2143 if(model_>nr_class==2 && model_>param.solver_type != MCSVM_CS) 2144 nr_w=1; 876 2145 else 877 nr_ classifier=model_>nr_class;878 2146 nr_w=model_>nr_class; 2147 879 2148 fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]); 880 2149 fprintf(fp, "nr_class %d\n", model_>nr_class); … … 883 2152 fprintf(fp, " %d", model_>label[i]); 884 2153 fprintf(fp, "\n"); 885 2154 886 2155 fprintf(fp, "nr_feature %d\n", nr_feature); 887 2156 888 2157 fprintf(fp, "bias %.16g\n", model_>bias); 889 2158 890 2159 fprintf(fp, "w\n"); 891 for(i=0; i< n; i++)2160 for(i=0; i<w_size; i++) 892 2161 { 893 2162 int j; 894 for(j=0; j<nr_ classifier; j++)895 fprintf(fp, "%.16g ", model_>w[i*nr_ classifier+j]);2163 for(j=0; j<nr_w; j++) 2164 fprintf(fp, "%.16g ", model_>w[i*nr_w+j]); 896 2165 fprintf(fp, "\n"); 897 2166 } 898 2167 899 2168 if (ferror(fp) != 0  fclose(fp) != 0) return 1; 900 2169 else return 0; … … 905 2174 FILE *fp = fopen(model_file_name,"r"); 906 2175 if(fp==NULL) return NULL; 907 2176 908 2177 int i; 909 2178 int nr_feature; … … 913 2182 model *model_ = Malloc(model,1); 914 2183 parameter& param = model_>param; 915 2184 916 2185 model_>label = NULL; 917 2186 918 2187 char cmd[81]; 919 2188 while(1) … … 973 2242 } 974 2243 } 975 2244 976 2245 nr_feature=model_>nr_feature; 977 2246 if(model_>bias>=0) … … 979 2248 else 980 2249 n=nr_feature; 981 982 int nr_ classifier;983 if(nr_class==2 )984 nr_ classifier= 1;2250 int w_size = n; 2251 int nr_w; 2252 if(nr_class==2 && param.solver_type != MCSVM_CS) 2253 nr_w = 1; 985 2254 else 986 nr_ classifier= nr_class;987 988 model_>w=Malloc(double, n*nr_classifier);989 for(i=0; i< n; i++)2255 nr_w = nr_class; 2256 2257 model_>w=Malloc(double, w_size*nr_w); 2258 for(i=0; i<w_size; i++) 990 2259 { 991 2260 int j; 992 for(j=0; j<nr_ classifier; j++)993 fscanf(fp, "%lf ", &model_>w[i*nr_ classifier+j]);2261 for(j=0; j<nr_w; j++) 2262 fscanf(fp, "%lf ", &model_>w[i*nr_w+j]); 994 2263 fscanf(fp, "\n"); 995 2264 } 996 2265 if (ferror(fp) != 0  fclose(fp) != 0) return NULL; 997 2266 998 2267 return model_; 999 2268 } 1000 2269 1001 int predict_values(const struct model *model_, const struct feature_node *x, double *dec_values) 1002 { 1003 int idx; 1004 int n; 1005 if(model_>bias>=0) 1006 n=model_>nr_feature+1; 1007 else 1008 n=model_>nr_feature; 1009 double *w=model_>w; 1010 int nr_class=model_>nr_class; 1011 int i; 1012 int nr_classifier; 1013 if(nr_class==2) 1014 nr_classifier = 1; 1015 else 1016 nr_classifier = nr_class; 1017 1018 const feature_node *lx=x; 1019 for(i=0;i<nr_classifier;i++) 1020 dec_values[i] = 0; 1021 for(; (idx=lx>index)!=1; lx++) 1022 { 1023 // the dimension of testing data may exceed that of training 1024 if(idx<=n) 1025 for(i=0;i<nr_classifier;i++) 1026 dec_values[i] += w[(idx1)*nr_classifier+i]*lx>value; 1027 } 1028 1029 if(nr_class==2) 1030 return (dec_values[0]>0)?model_>label[0]:model_>label[1]; 1031 else 1032 { 1033 int dec_max_idx = 0; 1034 for(i=1;i<nr_class;i++) 1035 { 1036 if(dec_values[i] > dec_values[dec_max_idx]) 1037 dec_max_idx = i; 1038 } 1039 return model_>label[dec_max_idx]; 1040 } 1041 } 1042 1043 int predict(const model *model_, const feature_node *x) 1044 { 1045 double *dec_values = Malloc(double, model_>nr_class); 1046 int label=predict_values(model_, x, dec_values); 1047 free(dec_values); 1048 return label; 1049 } 1050 1051 int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates) 1052 { 1053 if(model_>param.solver_type==L2_LR) 1054 { 1055 int i; 1056 int nr_class=model_>nr_class; 1057 int nr_classifier; 1058 if(nr_class==2) 1059 nr_classifier = 1; 1060 else 1061 nr_classifier = nr_class; 1062 1063 int label=predict_values(model_, x, prob_estimates); 1064 for(i=0;i<nr_classifier;i++) 1065 prob_estimates[i]=1/(1+exp(prob_estimates[i])); 1066 1067 if(nr_class==2) // for binary classification 1068 prob_estimates[1]=1.prob_estimates[0]; 1069 else 1070 { 1071 double sum=0; 1072 for(i=0; i<nr_class; i++) 1073 sum+=prob_estimates[i]; 1074 1075 for(i=0; i<nr_class; i++) 1076 prob_estimates[i]=prob_estimates[i]/sum; 1077 } 1078 1079 return label; 1080 } 1081 else 1082 return 0; 2270 int get_nr_feature(const model *model_) 2271 { 2272 return model_>nr_feature; 2273 } 2274 2275 int get_nr_class(const model *model_) 2276 { 2277 return model_>nr_class; 2278 } 2279 2280 void get_labels(const model *model_, int* label) 2281 { 2282 if (model_>label != NULL) 2283 for(int i=0;i<model_>nr_class;i++) 2284 label[i] = model_>label[i]; 2285 } 2286 2287 void free_model_content(struct model *model_ptr) 2288 { 2289 if(model_ptr>w != NULL) 2290 free(model_ptr>w); 2291 if(model_ptr>label != NULL) 2292 free(model_ptr>label); 2293 } 2294 2295 void free_and_destroy_model(struct model **model_ptr_ptr) 2296 { 2297 struct model *model_ptr = *model_ptr_ptr; 2298 if(model_ptr != NULL) 2299 { 2300 free_model_content(model_ptr); 2301 free(model_ptr); 2302 } 1083 2303 } 1084 2304 … … 1095 2315 if(param>eps <= 0) 1096 2316 return "eps <= 0"; 1097 2317 1098 2318 if(param>C <= 0) 1099 2319 return "C <= 0"; 1100 1101 if(param>solver_type != L2_LR 1102 && param>solver_type != L2LOSS_SVM_DUAL 1103 && param>solver_type != L2LOSS_SVM 1104 && param>solver_type != L1LOSS_SVM_DUAL) 2320 2321 if(param>solver_type != L2R_LR 2322 && param>solver_type != L2R_L2LOSS_SVC_DUAL 2323 && param>solver_type != L2R_L2LOSS_SVC 2324 && param>solver_type != L2R_L1LOSS_SVC_DUAL 2325 && param>solver_type != MCSVM_CS 2326 && param>solver_type != L1R_L2LOSS_SVC 2327 && param>solver_type != L1R_LR 2328 && param>solver_type != L2R_LR_DUAL) 1105 2329 return "unknown solver type"; 1106 1107 // if(param>solver_type == L1_LR) 1108 // return "sorry! sover_type = 1 (L1_LR) is not supported yet"; 1109 2330 1110 2331 return NULL; 1111 2332 } 1112 2333 1113 void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target) 1114 { 1115 int i; 1116 int *fold_start = Malloc(int,nr_fold+1); 1117 int l = prob>l; 1118 int *perm = Malloc(int,l); 1119 1120 for(i=0;i<l;i++) perm[i]=i; 1121 for(i=0;i<l;i++) 1122 { 1123 int j = i+rand()%(li); 1124 swap(perm[i],perm[j]); 1125 } 1126 for(i=0;i<=nr_fold;i++) 1127 fold_start[i]=i*l/nr_fold; 1128 1129 for(i=0;i<nr_fold;i++) 1130 { 1131 int begin = fold_start[i]; 1132 int end = fold_start[i+1]; 1133 int j,k; 1134 struct problem subprob; 1135 1136 subprob.bias = prob>bias; 1137 subprob.n = prob>n; 1138 subprob.l = l(endbegin); 1139 subprob.x = Malloc(struct feature_node*,subprob.l); 1140 subprob.y = Malloc(int,subprob.l); 1141 1142 k=0; 1143 for(j=0;j<begin;j++) 1144 { 1145 subprob.x[k] = prob>x[perm[j]]; 1146 subprob.y[k] = prob>y[perm[j]]; 1147 ++k; 1148 } 1149 for(j=end;j<l;j++) 1150 { 1151 subprob.x[k] = prob>x[perm[j]]; 1152 subprob.y[k] = prob>y[perm[j]]; 1153 ++k; 1154 } 1155 struct model *submodel = train(&subprob,param); 1156 for(j=begin;j<end;j++) 1157 target[perm[j]] = predict(submodel,prob>x[perm[j]]); 1158 destroy_model(submodel); 1159 free(subprob.x); 1160 free(subprob.y); 1161 } 1162 free(fold_start); 1163 free(perm); 1164 } 1165 1166 int get_nr_feature(const model *model_) 1167 { 1168 return model_>nr_feature; 1169 } 1170 1171 int get_nr_class(const model *model_) 1172 { 1173 return model_>nr_class; 1174 } 1175 1176 void get_labels(const model *model_, int* label) 1177 { 1178 if (model_>label != NULL) 1179 for(int i=0;i<model_>nr_class;i++) 1180 label[i] = model_>label[i]; 1181 } 2334 int check_probability_model(const struct model *model_) 2335 { 2336 return (model_>param.solver_type==L2R_LR  2337 model_>param.solver_type==L2R_LR_DUAL  2338 model_>param.solver_type==L1R_LR); 2339 } 2340 2341 void set_print_string_function(void (*print_func)(const char*)) 2342 { 2343 if (print_func == NULL) 2344 liblinear_print_string = &print_string_stdout; 2345 else 2346 liblinear_print_string = print_func; 2347 } 2348 1182 2349 1183 2350 /* … … 1196 2363 #if _MSC_VER!=0 && _MSC_VER<1300 1197 2364 #ifndef min 1198 template <class T> inline T min(T x,T y) { return (x<y)?x:y; }2365 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; } 1199 2366 #endif 1200 2367 1201 2368 #ifndef max 1202 template <class T> inline T max(T x,T y) { return (x>y)?x:y; }2369 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; } 1203 2370 #endif 1204 2371 #endif … … 1207 2374 extern "C" { 1208 2375 #endif 1209 1210 extern double dnrm2_(int *, double *, int *);1211 extern double ddot_(int *, double *, int *, double *, int *);1212 extern int daxpy_(int *, double *, double *, int *, double *, int *);1213 extern int dscal_(int *, double *, double *, int *);1214 2376 2377 extern double dnrm2_(int *, double *, int *); 2378 extern double ddot_(int *, double *, int *, double *, int *); 2379 extern int daxpy_(int *, double *, double *, int *, double *, int *); 2380 extern int dscal_(int *, double *, double *, int *); 2381 1215 2382 #ifdef __cplusplus 1216 2383 } 1217 2384 #endif 1218 2385 1219 1220 TRON::TRON(const function1 *fun_obj, double eps, int max_iter) 1221 { 1222 this>fun_obj=const_cast<function1 *>(fun_obj); 2386 static void default_print(const char *buf) 2387 { 2388 fputs(buf,stdout); 2389 fflush(stdout); 2390 } 2391 2392 void TRON::info(const char *fmt,...) 2393 { 2394 char buf[BUFSIZ]; 2395 va_list ap; 2396 va_start(ap,fmt); 2397 vsprintf(buf,fmt,ap); 2398 va_end(ap); 2399 (*tron_print_string)(buf); 2400 } 2401 2402 TRON::TRON(const function *fun_obj, double eps, int max_iter) 2403 { 2404 this>fun_obj=const_cast<function *>(fun_obj); 1223 2405 this>eps=eps; 1224 2406 this>max_iter=max_iter; 2407 tron_print_string = default_print; 1225 2408 } 1226 2409 … … 1233 2416 // Parameters for updating the iterates. 1234 2417 double eta0 = 1e4, eta1 = 0.25, eta2 = 0.75; 1235 2418 1236 2419 // Parameters for updating the trust region size delta. 1237 2420 double sigma1 = 0.25, sigma2 = 0.5, sigma3 = 4; 1238 2421 1239 2422 int n = fun_obj>get_nr_variable(); 1240 2423 int i, cg_iter; … … 1246 2429 double *w_new = new double[n]; 1247 2430 double *g = new double[n]; 1248 2431 1249 2432 for (i=0; i<n; i++) 1250 2433 w[i] = 0; 1251 1252 2434 2435 f = fun_obj>fun(w); 1253 2436 fun_obj>grad(w, g); 1254 2437 delta = dnrm2_(&n, g, &inc); 1255 2438 double gnorm1 = delta; 1256 2439 double gnorm = gnorm1; 1257 2440 1258 2441 if (gnorm <= eps*gnorm1) 1259 2442 search = 0; 1260 2443 1261 2444 iter = 1; 1262 2445 1263 2446 while (iter <= max_iter && search) 1264 2447 { 1265 2448 cg_iter = trcg(delta, g, s, r); 1266 2449 1267 2450 memcpy(w_new, w, sizeof(double)*n); 1268 2451 daxpy_(&n, &one, s, &inc, w_new, &inc); 1269 2452 1270 2453 gs = ddot_(&n, g, &inc, s, &inc); 1271 2454 prered = 0.5*(gsddot_(&n, s, &inc, r, &inc)); 1272 1273 2455 fnew = fun_obj>fun(w_new); 2456 1274 2457 // Compute the actual reduction. 1275 1276 2458 actred = f  fnew; 2459 1277 2460 // On the first iteration, adjust the initial step bound. 1278 2461 snorm = dnrm2_(&n, s, &inc); 1279 2462 if (iter == 1) 1280 2463 delta = min(delta, snorm); 1281 2464 1282 2465 // Compute prediction alpha*snorm of the step. 1283 2466 if (fnew  f  gs <= 0) … … 1285 2468 else 1286 2469 alpha = max(sigma1, 0.5*(gs/(fnew  f  gs))); 1287 2470 1288 2471 // Update the trust region bound according to the ratio of actual to predicted reduction. 1289 2472 if (actred < eta0*prered) … … 1295 2478 else 1296 2479 delta = max(delta, min(alpha*snorm, sigma3*delta)); 1297 1298 //printf("iter %2d act %5.3e pre %5.3e delta %5.3e f %5.3e g %5.3e CG %3d\n", iter, actred, prered, delta, f, gnorm, cg_iter);1299 2480 2481 info("iter %2d act %5.3e pre %5.3e delta %5.3e f %5.3e g %5.3e CG %3d\n", iter, actred, prered, delta, f, gnorm, cg_iter); 2482 1300 2483 if (actred > eta0*prered) 1301 2484 { … … 1303 2486 memcpy(w, w_new, sizeof(double)*n); 1304 2487 f = fnew; 1305 1306 2488 fun_obj>grad(w, g); 2489 1307 2490 gnorm = dnrm2_(&n, g, &inc); 1308 2491 if (gnorm <= eps*gnorm1) … … 1311 2494 if (f < 1.0e+32) 1312 2495 { 1313 printf("warning: f < 1.0e+32\n");2496 info("warning: f < 1.0e+32\n"); 1314 2497 break; 1315 2498 } 1316 2499 if (fabs(actred) <= 0 && prered <= 0) 1317 2500 { 1318 printf("warning: actred and prered <= 0\n");2501 info("warning: actred and prered <= 0\n"); 1319 2502 break; 1320 2503 } … … 1322 2505 fabs(prered) <= 1.0e12*fabs(f)) 1323 2506 { 1324 printf("warning: actred and prered too small\n");2507 info("warning: actred and prered too small\n"); 1325 2508 break; 1326 2509 } 1327 2510 } 1328 2511 1329 2512 delete[] g; 1330 2513 delete[] r; … … 1341 2524 double *Hd = new double[n]; 1342 2525 double rTr, rnewTrnew, alpha, beta, cgtol; 1343 2526 1344 2527 for (i=0; i<n; i++) 1345 2528 { … … 1349 2532 } 1350 2533 cgtol = 0.1*dnrm2_(&n, g, &inc); 1351 2534 1352 2535 int cg_iter = 0; 1353 2536 rTr = ddot_(&n, r, &inc, r, &inc); … … 1358 2541 cg_iter++; 1359 2542 fun_obj>Hv(d, Hd); 1360 2543 1361 2544 alpha = rTr/ddot_(&n, d, &inc, Hd, &inc); 1362 2545 daxpy_(&n, &alpha, d, &inc, s, &inc); 1363 2546 if (dnrm2_(&n, s, &inc) > delta) 1364 2547 { 1365 //printf("cg reaches trust region boundary\n");2548 info("cg reaches trust region boundary\n"); 1366 2549 alpha = alpha; 1367 2550 daxpy_(&n, &alpha, d, &inc, s, &inc); 1368 2551 1369 2552 double std = ddot_(&n, s, &inc, d, &inc); 1370 2553 double sts = ddot_(&n, s, &inc, s, &inc); … … 1389 2572 rTr = rnewTrnew; 1390 2573 } 1391 2574 1392 2575 delete[] d; 1393 2576 delete[] Hd; 1394 2577 1395 2578 return(cg_iter); 1396 2579 } … … 1405 2588 } 1406 2589 2590 void TRON::set_print_string(void (*print_string) (const char *buf)) 2591 { 2592 tron_print_string = print_string; 2593 } 2594 1407 2595 1408 2596 /* 1409 The folowing load save function 1s are used for orange pickling2597 The folowing load save functions are used for orange pickling 1410 2598 */ 1411 2599 … … 1425 2613 1426 2614 int nr_classifier; 1427 if(model_>nr_class==2 )2615 if(model_>nr_class==2 && model_>param.solver_type != MCSVM_CS) 1428 2616 nr_classifier=1; 1429 2617 else … … 1543 2731 1544 2732 int nr_classifier; 1545 if(nr_class==2 )2733 if(nr_class==2 && param.solver_type != MCSVM_CS) 1546 2734 nr_classifier = 1; 1547 2735 else … … 1659 2847 } 1660 2848 2849 static void dont_print_string(const char *s){} 2850 1661 2851 TLinearLearner::TLinearLearner(){ 1662 solver_type = L2 _LR;2852 solver_type = L2R_LR; 1663 2853 eps = 0.01f; 1664 2854 C=1; 2855 set_print_string_function(&dont_print_string); 1665 2856 } 1666 2857 … … 1697 2888 domain = examples>domain; 1698 2889 indexMap = _indexMap; 1699 computesProbabilities = linmodel>param.solver_type == L2_LR;1700 int nr_classifier = (linmodel>nr_class==2 )? 1 : linmodel>nr_class;2890 computesProbabilities = check_probability_model(linmodel); 2891 int nr_classifier = (linmodel>nr_class==2 && linmodel>param.solver_type != MCSVM_CS)? 1 : linmodel>nr_class; 1701 2892 weights = mlnew TFloatListList(nr_classifier); 1702 2893 for (int i=0; i<nr_classifier; i++){ … … 1709 2900 TLinearClassifier::~TLinearClassifier(){ 1710 2901 if (linmodel) 1711 destroy_model(linmodel);2902 free_and_destroy_model(&linmodel); 1712 2903 if (indexMap) 1713 2904 delete indexMap; 
source/orange/linear.hpp
r6796 r7812 1 1 /* 2 Copyright (c) 20072008 The LIBLINEAR Project. 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions 7 are met: 8 9 1. Redistributions of source code must retain the above copyright 10 notice, this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither name of copyright holders nor the names of its contributors 17 may be used to endorse or promote products derived from this software 18 without specific prior written permission. 19 20 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 25 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2 3 Copyright (c) 20072010 The LIBLINEAR Project. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions 8 are met: 9 10 1. Redistributions of source code must retain the above copyright 11 notice, this list of conditions and the following disclaimer. 12 13 2. Redistributions in binary form must reproduce the above copyright 14 notice, this list of conditions and the following disclaimer in the 15 documentation and/or other materials provided with the distribution. 16 17 3. Neither name of copyright holders nor the names of its contributors 18 may be used to endorse or promote products derived from this software 19 without specific prior written permission. 20 21 22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 26 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 27 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 28 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 29 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 30 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 31 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 32 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 32 34 */ 33 35 … … 40 42 extern "C" { 41 43 #endif 42 43 struct feature_node 44 { 45 int index; 46 double value; 47 }; 48 49 struct problem 50 { 51 int l, n; 52 int *y; 53 struct feature_node **x; 54 double bias; /* < 0 if no bias term */ 55 }; 56 57 enum { L2_LR, L2LOSS_SVM_DUAL, L2LOSS_SVM, L1LOSS_SVM_DUAL }; /* solver_type */ 58 59 struct parameter 60 { 61 int solver_type; 62 63 /* these are for training only */ 64 double eps; /* stopping criteria */ 65 double C; 66 int nr_weight; 67 int *weight_label; 68 double* weight; 69 }; 70 71 struct model 72 { 73 struct parameter param; 74 int nr_class; /* number of classes */ 75 int nr_feature; 76 double *w; 77 int *label; /* label of each class (label[n]) */ 78 double bias; 79 }; 80 81 struct model* train(const struct problem *prob, const struct parameter *param); 82 void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, int *target); 83 84 int predict_values(const struct model *model_, const struct feature_node *x, double* dec_values); 85 int predict(const struct model *model_, const struct feature_node *x); 86 int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates); 87 88 int save_model(const char *model_file_name, const struct model *model_); 89 struct model *load_model(const char *model_file_name); 90 91 int get_nr_feature(const struct model *model_); 92 int get_nr_class(const struct model *model_); 93 void get_labels(const struct model *model_, int* label); 94 95 void destroy_model(struct model *model_); 96 void destroy_param(struct parameter *param); 97 const char *check_parameter(const struct problem *prob, const struct parameter *param); 98 44 45 struct feature_node 46 { 47 int index; 48 double value; 49 }; 50 51 struct problem 52 { 53 int l, n; 54 int *y; 55 struct feature_node **x; 56 double bias; /* < 0 if no bias term */ 57 }; 58 59 enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL }; /* solver_type */ 60 61 struct parameter 62 { 63 int solver_type; 64 65 /* these are for training only */ 66 double eps; /* stopping criteria */ 67 double C; 68 int nr_weight; 69 int *weight_label; 70 double* weight; 71 }; 72 73 struct model 74 { 75 struct parameter param; 76 int nr_class; /* number of classes */ 77 int nr_feature; 78 double *w; 79 int *label; /* label of each class */ 80 double bias; 81 }; 82 83 struct model* train(const struct problem *prob, const struct parameter *param); 84 void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, int *target); 85 86 int predict_values(const struct model *model_, const struct feature_node *x, double* dec_values); 87 int predict(const struct model *model_, const struct feature_node *x); 88 int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates); 89 90 int save_model(const char *model_file_name, const struct model *model_); 91 struct model *load_model(const char *model_file_name); 92 93 int get_nr_feature(const struct model *model_); 94 int get_nr_class(const struct model *model_); 95 void get_labels(const struct model *model_, int* label); 96 97 void free_model_content(struct model *model_ptr); 98 void free_and_destroy_model(struct model **model_ptr_ptr); 99 void destroy_param(struct parameter *param); 100 101 const char *check_parameter(const struct problem *prob, const struct parameter *param); 102 int check_probability_model(const struct model *model); 103 void set_print_string_function(void (*print_func) (const char*)); 104 99 105 #ifdef __cplusplus 100 106 } … … 106 112 #define _TRON_H 107 113 108 class function 1114 class function 109 115 { 110 116 public: … … 112 118 virtual void grad(double *w, double *g) = 0 ; 113 119 virtual void Hv(double *s, double *Hs) = 0 ; 114 120 115 121 virtual int get_nr_variable(void) = 0 ; 116 virtual ~function 1(void){}122 virtual ~function(void){} 117 123 }; 118 124 … … 120 126 { 121 127 public: 122 TRON(const function 1*fun_obj, double eps = 0.1, int max_iter = 1000);128 TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000); 123 129 ~TRON(); 124 130 125 131 void tron(double *w); 126 132 void set_print_string(void (*i_print) (const char *buf)); 133 127 134 private: 128 135 int trcg(double delta, double *g, double *s, double *r); 129 136 double norm_inf(int n, double *x); 130 137 131 138 double eps; 132 139 int max_iter; 133 function1 *fun_obj; 134 }; 135 136 #endif /* _TRON_H */ 140 function *fun_obj; 141 void info(const char *fmt,...); 142 void (*tron_print_string)(const char *buf); 143 }; 144 #endif 145 137 146 138 147 #ifndef LINEAR_HPP … … 157 166 __REGISTER_CLASS 158 167 159 CLASSCONSTANTS(Lossfunction1) enum {L2_LR, L2Loss_SVM_Dual, L2Loss_SVM, L1Loss_SVM_Dual }; 168 CLASSCONSTANTS(Lossfunction1_old_) enum {L2_LR, L2Loss_SVM_Dual, L2Loss_SVM, L1Loss_SVM_Dual }; //For backwards compatibility with 1.4 version. 169 CLASSCONSTANTS(Lossfunction1) enum {L2R_LR, L2R_L2Loss_SVC_Dual, L2R_L2Loss_SVC, L2R_L1Loss_SVC_Dual, MCSVM_CS, L1R_L2Loss_SVC, L1R_LR, L2R_LR_Dual}; 170 CLASSCONSTANTS(LIBLINEAR_VERSION: VERSION=170) 160 171 161 172 int solver_type; //P(&LinearLearner_Lossfunction1) Solver type (loss function1)
Note: See TracChangeset
for help on using the changeset viewer.