Changeset 11670:d967a4ceb8b4 in orange for source/orange/liblinear/linear.cpp
 Timestamp:
 08/22/13 20:23:29 (8 months ago)
 Branch:
 default
 committer:
 426ac3b6726e204573736572203c626a6f65726e2e657373657240676d61696c2e636f6d3e2031333737323834343333202d37323030
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

source/orange/liblinear/linear.cpp
r8978 r11670 4 4 #include <string.h> 5 5 #include <stdarg.h> 6 #include <locale.h> 6 7 #include "linear.h" 7 8 #include "tron.h" … … 15 16 #endif 16 17 template <class S, class T> static inline void clone(T*& dst, S* src, int n) 17 { 18 { 18 19 dst = new T[n]; 19 20 memcpy((void *)dst,(void *)src,sizeof(T)*n); … … 44 45 #endif 45 46 46 class l2r_lr_fun 47 class l2r_lr_fun: public function 47 48 { 48 49 public: 49 l2r_lr_fun(const problem *prob, double Cp, double Cn);50 l2r_lr_fun(const problem *prob, double *C); 50 51 ~l2r_lr_fun(); 51 52 … … 66 67 }; 67 68 68 l2r_lr_fun::l2r_lr_fun(const problem *prob, double Cp, double Cn) 69 { 70 int i; 69 l2r_lr_fun::l2r_lr_fun(const problem *prob, double *C) 70 { 71 71 int l=prob>l; 72 int *y=prob>y;73 72 74 73 this>prob = prob; … … 76 75 z = new double[l]; 77 76 D = new double[l]; 78 C = new double[l]; 79 80 for (i=0; i<l; i++) 81 { 82 if (y[i] == 1) 83 C[i] = Cp; 84 else 85 C[i] = Cn; 86 } 77 this>C = C; 87 78 } 88 79 … … 91 82 delete[] z; 92 83 delete[] D; 93 delete[] C;94 84 } 95 85 … … 99 89 int i; 100 90 double f=0; 101 int*y=prob>y;91 double *y=prob>y; 102 92 int l=prob>l; 103 93 int w_size=get_nr_variable(); 104 94 105 95 Xv(w, z); 96 97 for(i=0;i<w_size;i++) 98 f += w[i]*w[i]; 99 f /= 2.0; 106 100 for(i=0;i<l;i++) 107 101 { … … 112 106 f += C[i]*(yz+log(1 + exp(yz))); 113 107 } 114 f = 2*f;115 for(i=0;i<w_size;i++)116 f += w[i]*w[i];117 f /= 2.0;118 108 119 109 return(f); … … 123 113 { 124 114 int i; 125 int*y=prob>y;115 double *y=prob>y; 126 116 int l=prob>l; 127 117 int w_size=get_nr_variable(); … … 199 189 } 200 190 201 class l2r_l2_svc_fun 191 class l2r_l2_svc_fun: public function 202 192 { 203 193 public: 204 l2r_l2_svc_fun(const problem *prob, double Cp, double Cn);194 l2r_l2_svc_fun(const problem *prob, double *C); 205 195 ~l2r_l2_svc_fun(); 206 196 … … 211 201 int get_nr_variable(void); 212 202 213 pr ivate:203 protected: 214 204 void Xv(double *v, double *Xv); 215 205 void subXv(double *v, double *Xv); … … 224 214 }; 225 215 226 l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double Cp, double Cn) 227 { 228 int i; 216 l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double *C) 217 { 229 218 int l=prob>l; 230 int *y=prob>y;231 219 232 220 this>prob = prob; … … 234 222 z = new double[l]; 235 223 D = new double[l]; 236 C = new double[l];237 224 I = new int[l]; 238 239 for (i=0; i<l; i++) 240 { 241 if (y[i] == 1) 242 C[i] = Cp; 243 else 244 C[i] = Cn; 245 } 225 this>C = C; 246 226 } 247 227 … … 250 230 delete[] z; 251 231 delete[] D; 252 delete[] C;253 232 delete[] I; 254 233 } … … 258 237 int i; 259 238 double f=0; 260 int*y=prob>y;239 double *y=prob>y; 261 240 int l=prob>l; 262 241 int w_size=get_nr_variable(); 263 242 264 243 Xv(w, z); 244 245 for(i=0;i<w_size;i++) 246 f += w[i]*w[i]; 247 f /= 2.0; 265 248 for(i=0;i<l;i++) 266 249 { … … 270 253 f += C[i]*d*d; 271 254 } 272 f = 2*f;273 for(i=0;i<w_size;i++)274 f += w[i]*w[i];275 f /= 2.0;276 255 277 256 return(f); … … 281 260 { 282 261 int i; 283 int*y=prob>y;262 double *y=prob>y; 284 263 int l=prob>l; 285 264 int w_size=get_nr_variable(); … … 307 286 { 308 287 int i; 309 int l=prob>l;310 288 int w_size=get_nr_variable(); 311 double *wa = new double[ l];289 double *wa = new double[sizeI]; 312 290 313 291 subXv(s, wa); … … 373 351 } 374 352 } 353 } 354 355 class l2r_l2_svr_fun: public l2r_l2_svc_fun 356 { 357 public: 358 l2r_l2_svr_fun(const problem *prob, double *C, double p); 359 360 double fun(double *w); 361 void grad(double *w, double *g); 362 363 private: 364 double p; 365 }; 366 367 l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, double *C, double p): 368 l2r_l2_svc_fun(prob, C) 369 { 370 this>p = p; 371 } 372 373 double l2r_l2_svr_fun::fun(double *w) 374 { 375 int i; 376 double f=0; 377 double *y=prob>y; 378 int l=prob>l; 379 int w_size=get_nr_variable(); 380 double d; 381 382 Xv(w, z); 383 384 for(i=0;i<w_size;i++) 385 f += w[i]*w[i]; 386 f /= 2; 387 for(i=0;i<l;i++) 388 { 389 d = z[i]  y[i]; 390 if(d < p) 391 f += C[i]*(d+p)*(d+p); 392 else if(d > p) 393 f += C[i]*(dp)*(dp); 394 } 395 396 return(f); 397 } 398 399 void l2r_l2_svr_fun::grad(double *w, double *g) 400 { 401 int i; 402 double *y=prob>y; 403 int l=prob>l; 404 int w_size=get_nr_variable(); 405 double d; 406 407 sizeI = 0; 408 for(i=0;i<l;i++) 409 { 410 d = z[i]  y[i]; 411 412 // generate index set I 413 if(d < p) 414 { 415 z[sizeI] = C[i]*(d+p); 416 I[sizeI] = i; 417 sizeI++; 418 } 419 else if(d > p) 420 { 421 z[sizeI] = C[i]*(dp); 422 I[sizeI] = i; 423 sizeI++; 424 } 425 426 } 427 subXTv(z, g); 428 429 for(i=0;i<w_size;i++) 430 g[i] = w[i] + 2*g[i]; 375 431 } 376 432 … … 395 451 // See Appendix of LIBLINEAR paper, Fan et al. (2008) 396 452 397 #define GETI(i) ( prob>y[i])453 #define GETI(i) ((int) prob>y[i]) 398 454 // To support weights for instances, use GETI(i) (i) 399 455 … … 456 512 for(r=1;r<active_i && beta<r*D[r];r++) 457 513 beta += D[r]; 458 459 514 beta /= r; 515 460 516 for(r=0;r<active_i;r++) 461 517 { … … 494 550 double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking 495 551 bool start_from_all = true; 496 // initial 552 553 // Initial alpha can be set here. Note that 554 // sum_m alpha[i*nr_class+m] = 0, for all i=1,...,l1 555 // alpha[i*nr_class+m] <= C[GETI(i)] if prob>y[i] == m 556 // alpha[i*nr_class+m] <= 0 if prob>y[i] != m 557 // If initial alpha isn't zero, uncomment the for loop below to initialize w 497 558 for(i=0;i<l*nr_class;i++) 498 559 alpha[i] = 0; 560 499 561 for(i=0;i<w_size*nr_class;i++) 500 w[i] = 0; 562 w[i] = 0; 501 563 for(i=0;i<l;i++) 502 564 { … … 507 569 while(xi>index != 1) 508 570 { 509 QD[i] += (xi>value)*(xi>value); 571 double val = xi>value; 572 QD[i] += val*val; 573 574 // Uncomment the for loop if initial alpha isn't zero 575 // for(m=0; m<nr_class; m++) 576 // w[(xi>index1)*nr_class+m] += alpha[i*nr_class+m]*val; 510 577 xi++; 511 578 } 512 579 active_size_i[i] = nr_class; 513 y_index[i] = prob>y[i];580 y_index[i] = (int)prob>y[i]; 514 581 index[i] = i; 515 582 } 516 583 517 while(iter < max_iter) 584 while(iter < max_iter) 518 585 { 519 586 double stopping = INF; … … 556 623 } 557 624 if(y_index[i] < active_size_i[i]) 558 if(alpha_i[ prob>y[i]] < C[GETI(i)] && G[y_index[i]] < minG)625 if(alpha_i[(int) prob>y[i]] < C[GETI(i)] && G[y_index[i]] < minG) 559 626 minG = G[y_index[i]]; 560 627 … … 566 633 while(active_size_i[i]>m) 567 634 { 568 if(!be_shrunk(i, active_size_i[i], y_index[i], 635 if(!be_shrunk(i, active_size_i[i], y_index[i], 569 636 alpha_i[alpha_index_i[active_size_i[i]]], minG)) 570 637 { … … 573 640 if(y_index[i] == active_size_i[i]) 574 641 y_index[i] = m; 575 else if(y_index[i] == m) 642 else if(y_index[i] == m) 576 643 y_index[i] = active_size_i[i]; 577 644 break; … … 586 653 active_size; 587 654 swap(index[s], index[active_size]); 588 s; 655 s; 589 656 continue; 590 657 } … … 664 731 } 665 732 for(i=0;i<l;i++) 666 v = alpha[i*nr_class+ prob>y[i]];733 v = alpha[i*nr_class+(int)prob>y[i]]; 667 734 info("Objective value = %lf\n",v); 668 735 info("nSV = %d\n",nSV); … … 683 750 // 684 751 // min_\alpha 0.5(\alpha^T (Q + D)\alpha)  e^T \alpha, 685 // s.t. 0 <= alpha_i <= upper_bound_i,752 // s.t. 0 <= \alpha_i <= upper_bound_i, 686 753 // 687 754 // where Qij = yi yj xi^T xj and … … 710 777 711 778 static void solve_l2r_l1l2_svc( 712 const problem *prob, double *w, double eps, 779 const problem *prob, double *w, double eps, 713 780 double Cp, double Cn, int solver_type) 714 781 { … … 741 808 } 742 809 810 for(i=0; i<l; i++) 811 { 812 if(prob>y[i] > 0) 813 { 814 y[i] = +1; 815 } 816 else 817 { 818 y[i] = 1; 819 } 820 } 821 822 // Initial alpha can be set here. Note that 823 // 0 <= alpha[i] <= upper_bound[GETI(i)] 824 for(i=0; i<l; i++) 825 alpha[i] = 0; 826 743 827 for(i=0; i<w_size; i++) 744 828 w[i] = 0; 745 829 for(i=0; i<l; i++) 746 830 { 747 alpha[i] = 0;748 if(prob>y[i] > 0)749 {750 y[i] = +1;751 }752 else753 {754 y[i] = 1;755 }756 831 QD[i] = diag[GETI(i)]; 757 832 … … 759 834 while (xi>index != 1) 760 835 { 761 QD[i] += (xi>value)*(xi>value); 836 double val = xi>value; 837 QD[i] += val*val; 838 w[xi>index1] += y[i]*alpha[i]*val; 762 839 xi++; 763 840 } … … 888 965 } 889 966 967 968 // A coordinate descent algorithm for 969 // L1loss and L2loss epsilonSVR dual problem 970 // 971 // min_\beta 0.5\beta^T (Q + diag(lambda)) \beta  p \sum_{i=1}^l\beta_i + \sum_{i=1}^l yi\beta_i, 972 // s.t. upper_bound_i <= \beta_i <= upper_bound_i, 973 // 974 // where Qij = xi^T xj and 975 // D is a diagonal matrix 976 // 977 // In L1SVM case: 978 // upper_bound_i = C 979 // lambda_i = 0 980 // In L2SVM case: 981 // upper_bound_i = INF 982 // lambda_i = 1/(2*C) 983 // 984 // Given: 985 // x, y, p, C 986 // eps is the stopping tolerance 987 // 988 // solution will be put in w 989 // 990 // See Algorithm 4 of Ho and Lin, 2012 991 992 #undef GETI 993 #define GETI(i) (0) 994 // To support weights for instances, use GETI(i) (i) 995 996 static void solve_l2r_l1l2_svr( 997 const problem *prob, double *w, const parameter *param, 998 int solver_type) 999 { 1000 int l = prob>l; 1001 double C = param>C; 1002 double p = param>p; 1003 int w_size = prob>n; 1004 double eps = param>eps; 1005 int i, s, iter = 0; 1006 int max_iter = 1000; 1007 int active_size = l; 1008 int *index = new int[l]; 1009 1010 double d, G, H; 1011 double Gmax_old = INF; 1012 double Gmax_new, Gnorm1_new; 1013 double Gnorm1_init; 1014 double *beta = new double[l]; 1015 double *QD = new double[l]; 1016 double *y = prob>y; 1017 1018 // L2R_L2LOSS_SVR_DUAL 1019 double lambda[1], upper_bound[1]; 1020 lambda[0] = 0.5/C; 1021 upper_bound[0] = INF; 1022 1023 if(solver_type == L2R_L1LOSS_SVR_DUAL) 1024 { 1025 lambda[0] = 0; 1026 upper_bound[0] = C; 1027 } 1028 1029 // Initial beta can be set here. Note that 1030 // upper_bound <= beta[i] <= upper_bound 1031 for(i=0; i<l; i++) 1032 beta[i] = 0; 1033 1034 for(i=0; i<w_size; i++) 1035 w[i] = 0; 1036 for(i=0; i<l; i++) 1037 { 1038 QD[i] = 0; 1039 feature_node *xi = prob>x[i]; 1040 while(xi>index != 1) 1041 { 1042 double val = xi>value; 1043 QD[i] += val*val; 1044 w[xi>index1] += beta[i]*val; 1045 xi++; 1046 } 1047 1048 index[i] = i; 1049 } 1050 1051 1052 while(iter < max_iter) 1053 { 1054 Gmax_new = 0; 1055 Gnorm1_new = 0; 1056 1057 for(i=0; i<active_size; i++) 1058 { 1059 int j = i+rand()%(active_sizei); 1060 swap(index[i], index[j]); 1061 } 1062 1063 for(s=0; s<active_size; s++) 1064 { 1065 i = index[s]; 1066 G = y[i] + lambda[GETI(i)]*beta[i]; 1067 H = QD[i] + lambda[GETI(i)]; 1068 1069 feature_node *xi = prob>x[i]; 1070 while(xi>index != 1) 1071 { 1072 int ind = xi>index1; 1073 double val = xi>value; 1074 G += val*w[ind]; 1075 xi++; 1076 } 1077 1078 double Gp = G+p; 1079 double Gn = Gp; 1080 double violation = 0; 1081 if(beta[i] == 0) 1082 { 1083 if(Gp < 0) 1084 violation = Gp; 1085 else if(Gn > 0) 1086 violation = Gn; 1087 else if(Gp>Gmax_old && Gn<Gmax_old) 1088 { 1089 active_size; 1090 swap(index[s], index[active_size]); 1091 s; 1092 continue; 1093 } 1094 } 1095 else if(beta[i] >= upper_bound[GETI(i)]) 1096 { 1097 if(Gp > 0) 1098 violation = Gp; 1099 else if(Gp < Gmax_old) 1100 { 1101 active_size; 1102 swap(index[s], index[active_size]); 1103 s; 1104 continue; 1105 } 1106 } 1107 else if(beta[i] <= upper_bound[GETI(i)]) 1108 { 1109 if(Gn < 0) 1110 violation = Gn; 1111 else if(Gn > Gmax_old) 1112 { 1113 active_size; 1114 swap(index[s], index[active_size]); 1115 s; 1116 continue; 1117 } 1118 } 1119 else if(beta[i] > 0) 1120 violation = fabs(Gp); 1121 else 1122 violation = fabs(Gn); 1123 1124 Gmax_new = max(Gmax_new, violation); 1125 Gnorm1_new += violation; 1126 1127 // obtain Newton direction d 1128 if(Gp < H*beta[i]) 1129 d = Gp/H; 1130 else if(Gn > H*beta[i]) 1131 d = Gn/H; 1132 else 1133 d = beta[i]; 1134 1135 if(fabs(d) < 1.0e12) 1136 continue; 1137 1138 double beta_old = beta[i]; 1139 beta[i] = min(max(beta[i]+d, upper_bound[GETI(i)]), upper_bound[GETI(i)]); 1140 d = beta[i]beta_old; 1141 1142 if(d != 0) 1143 { 1144 xi = prob>x[i]; 1145 while(xi>index != 1) 1146 { 1147 w[xi>index1] += d*xi>value; 1148 xi++; 1149 } 1150 } 1151 } 1152 1153 if(iter == 0) 1154 Gnorm1_init = Gnorm1_new; 1155 iter++; 1156 if(iter % 10 == 0) 1157 info("."); 1158 1159 if(Gnorm1_new <= eps*Gnorm1_init) 1160 { 1161 if(active_size == l) 1162 break; 1163 else 1164 { 1165 active_size = l; 1166 info("*"); 1167 Gmax_old = INF; 1168 continue; 1169 } 1170 } 1171 1172 Gmax_old = Gmax_new; 1173 } 1174 1175 info("\noptimization finished, #iter = %d\n", iter); 1176 if(iter >= max_iter) 1177 info("\nWARNING: reaching max number of iterations\nUsing s 11 may be faster\n\n"); 1178 1179 // calculate objective value 1180 double v = 0; 1181 int nSV = 0; 1182 for(i=0; i<w_size; i++) 1183 v += w[i]*w[i]; 1184 v = 0.5*v; 1185 for(i=0; i<l; i++) 1186 { 1187 v += p*fabs(beta[i])  y[i]*beta[i] + 0.5*lambda[GETI(i)]*beta[i]*beta[i]; 1188 if(beta[i] != 0) 1189 nSV++; 1190 } 1191 1192 info("Objective value = %lf\n", v); 1193 info("nSV = %d\n",nSV); 1194 1195 delete [] beta; 1196 delete [] QD; 1197 delete [] index; 1198 } 1199 1200 890 1201 // A coordinate descent algorithm for 891 1202 // the dual of L2regularized logistic regression problems 892 1203 // 893 // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i  alpha_i) log (upper_bound_i  alpha_i),894 // s.t. 0 <= alpha_i <= upper_bound_i,1204 // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i  \alpha_i) log (upper_bound_i  \alpha_i), 1205 // s.t. 0 <= \alpha_i <= upper_bound_i, 895 1206 // 896 1207 // where Qij = yi yj xi^T xj and … … 917 1228 double *xTx = new double[l]; 918 1229 int max_iter = 1000; 919 int *index = new int[l]; 1230 int *index = new int[l]; 920 1231 double *alpha = new double[2*l]; // store alpha and C  alpha 921 schar *y = new schar[l]; 1232 schar *y = new schar[l]; 922 1233 int max_inner_iter = 100; // for inner Newton 923 double innereps = 1e2; 1234 double innereps = 1e2; 924 1235 double innereps_min = min(1e8, eps); 925 1236 double upper_bound[3] = {Cn, 0, Cp}; 1237 1238 for(i=0; i<l; i++) 1239 { 1240 if(prob>y[i] > 0) 1241 { 1242 y[i] = +1; 1243 } 1244 else 1245 { 1246 y[i] = 1; 1247 } 1248 } 1249 1250 // Initial alpha can be set here. Note that 1251 // 0 < alpha[i] < upper_bound[GETI(i)] 1252 // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)] 1253 for(i=0; i<l; i++) 1254 { 1255 alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e8); 1256 alpha[2*i+1] = upper_bound[GETI(i)]  alpha[2*i]; 1257 } 926 1258 927 1259 for(i=0; i<w_size; i++) … … 929 1261 for(i=0; i<l; i++) 930 1262 { 931 if(prob>y[i] > 0)932 {933 y[i] = +1;934 }935 else936 {937 y[i] = 1;938 }939 alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e8);940 alpha[2*i+1] = upper_bound[GETI(i)]  alpha[2*i];941 942 1263 xTx[i] = 0; 943 1264 feature_node *xi = prob>x[i]; 944 1265 while (xi>index != 1) 945 1266 { 946 xTx[i] += (xi>value)*(xi>value); 947 w[xi>index1] += y[i]*alpha[2*i]*xi>value; 1267 double val = xi>value; 1268 xTx[i] += val*val; 1269 w[xi>index1] += y[i]*alpha[2*i]*val; 948 1270 xi++; 949 1271 } … … 977 1299 // Decide to minimize g_1(z) or g_2(z) 978 1300 int ind1 = 2*i, ind2 = 2*i+1, sign = 1; 979 if(0.5*a*(alpha[ind2]alpha[ind1])+b < 0) 1301 if(0.5*a*(alpha[ind2]alpha[ind1])+b < 0) 980 1302 { 981 1303 ind1 = 2*i+1; … … 987 1309 double alpha_old = alpha[ind1]; 988 1310 double z = alpha_old; 989 if(C  z < 0.5 * C) 1311 if(C  z < 0.5 * C) 990 1312 z = 0.1*z; 991 1313 double gp = a*(zalpha_old)+sign*b+log(z/(Cz)); … … 995 1317 const double eta = 0.1; // xi in the paper 996 1318 int inner_iter = 0; 997 while (inner_iter <= max_inner_iter) 1319 while (inner_iter <= max_inner_iter) 998 1320 { 999 1321 if(fabs(gp) < innereps) … … 1001 1323 double gpp = a + C/(Cz)/z; 1002 1324 double tmpz = z  gp/gpp; 1003 if(tmpz <= 0) 1325 if(tmpz <= 0) 1004 1326 z *= eta; 1005 1327 else // tmpz in (0, C) … … 1019 1341 w[xi>index1] += sign*(zalpha_old)*yi*xi>value; 1020 1342 xi++; 1021 } 1343 } 1022 1344 } 1023 1345 } … … 1027 1349 info("."); 1028 1350 1029 if(Gmax < eps) 1351 if(Gmax < eps) 1030 1352 break; 1031 1353 1032 if(newton_iter <= l/10) 1354 if(newton_iter <= l/10) 1033 1355 innereps = max(innereps_min, 0.1*innereps); 1034 1356 … … 1040 1362 1041 1363 // calculate objective value 1042 1364 1043 1365 double v = 0; 1044 1366 for(i=0; i<w_size; i++) … … 1046 1368 v *= 0.5; 1047 1369 for(i=0; i<l; i++) 1048 v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1]) 1370 v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1]) 1049 1371  upper_bound[GETI(i)] * log(upper_bound[GETI(i)]); 1050 1372 info("Objective value = %lf\n", v); … … 1074 1396 1075 1397 static void solve_l1r_l2_svc( 1076 problem *prob_col, double *w, double eps, 1398 problem *prob_col, double *w, double eps, 1077 1399 double Cp, double Cn) 1078 1400 { … … 1101 1423 double C[3] = {Cn,0,Cp}; 1102 1424 1425 // Initial w can be set here. 1426 for(j=0; j<w_size; j++) 1427 w[j] = 0; 1428 1103 1429 for(j=0; j<l; j++) 1104 1430 { … … 1111 1437 for(j=0; j<w_size; j++) 1112 1438 { 1113 w[j] = 0;1114 1439 index[j] = j; 1115 1440 xj_sq[j] = 0; … … 1118 1443 { 1119 1444 int ind = x>index1; 1445 x>value *= y[ind]; // x>value stores yi*xij 1120 1446 double val = x>value; 1121 x>value *= y[ind]; // x>value stores yi*xij1447 b[ind] = w[j]*val; 1122 1448 xj_sq[j] += C[GETI(ind)]*val*val; 1123 1449 x++; … … 1187 1513 1188 1514 // obtain Newton direction d 1189 if(Gp < =H*w[j])1515 if(Gp < H*w[j]) 1190 1516 d = Gp/H; 1191 else if(Gn > =H*w[j])1517 else if(Gn > H*w[j]) 1192 1518 d = Gn/H; 1193 1519 else … … 1357 1683 1358 1684 static void solve_l1r_lr( 1359 const problem *prob_col, double *w, double eps, 1685 const problem *prob_col, double *w, double eps, 1360 1686 double Cp, double Cn) 1361 1687 { … … 1372 1698 double inner_eps = 1; 1373 1699 double sigma = 0.01; 1374 double w_norm =0, w_norm_new;1700 double w_norm, w_norm_new; 1375 1701 double z, G, H; 1376 1702 double Gnorm1_init; … … 1396 1722 double C[3] = {Cn,0,Cp}; 1397 1723 1724 // Initial w can be set here. 1725 for(j=0; j<w_size; j++) 1726 w[j] = 0; 1727 1398 1728 for(j=0; j<l; j++) 1399 1729 { … … 1403 1733 y[j] = 1; 1404 1734 1405 // assume initial w is 0 1406 exp_wTx[j] = 1; 1407 tau[j] = C[GETI(j)]*0.5; 1408 D[j] = C[GETI(j)]*0.25; 1409 } 1735 exp_wTx[j] = 0; 1736 } 1737 1738 w_norm = 0; 1410 1739 for(j=0; j<w_size; j++) 1411 1740 { 1412 w [j] = 0;1741 w_norm += fabs(w[j]); 1413 1742 wpd[j] = w[j]; 1414 1743 index[j] = j; … … 1418 1747 { 1419 1748 int ind = x>index1; 1749 double val = x>value; 1750 exp_wTx[ind] += w[j]*val; 1420 1751 if(y[ind] == 1) 1421 xjneg_sum[j] += C[GETI(ind)]* x>value;1752 xjneg_sum[j] += C[GETI(ind)]*val; 1422 1753 x++; 1423 1754 } 1755 } 1756 for(j=0; j<l; j++) 1757 { 1758 exp_wTx[j] = exp(exp_wTx[j]); 1759 double tau_tmp = 1/(1+exp_wTx[j]); 1760 tau[j] = C[GETI(j)]*tau_tmp; 1761 D[j] = C[GETI(j)]*exp_wTx[j]*tau_tmp*tau_tmp; 1424 1762 } 1425 1763 … … 1540 1878 1541 1879 // obtain solution of onevariable problem 1542 if(Gp < =H*wpd[j])1880 if(Gp < H*wpd[j]) 1543 1881 z = Gp/H; 1544 else if(Gn > =H*wpd[j])1882 else if(Gn > H*wpd[j]) 1545 1883 z = Gn/H; 1546 1884 else … … 1677 2015 1678 2016 // calculate objective value 1679 2017 1680 2018 double v = 0; 1681 2019 int nnz = 0; … … 1719 2057 prob_col>l = l; 1720 2058 prob_col>n = n; 1721 prob_col>y = new int[l];2059 prob_col>y = new double[l]; 1722 2060 prob_col>x = new feature_node*[n]; 1723 2061 … … 1778 2116 for(i=0;i<l;i++) 1779 2117 { 1780 int this_label = prob>y[i];2118 int this_label = (int)prob>y[i]; 1781 2119 int j; 1782 2120 for(j=0;j<nr_class;j++) … … 1829 2167 int neg = 0; 1830 2168 for(int i=0;i<prob>l;i++) 1831 if(prob>y[i] ==+1)2169 if(prob>y[i] > 0) 1832 2170 pos++; 1833 2171 neg = prob>l  pos; 1834 2172 2173 double primal_solver_tol = eps*max(min(pos,neg), 1)/prob>l; 2174 1835 2175 function *fun_obj=NULL; 1836 2176 switch(param>solver_type) … … 1838 2178 case L2R_LR: 1839 2179 { 1840 fun_obj=new l2r_lr_fun(prob, Cp, Cn); 1841 TRON tron_obj(fun_obj, eps*min(pos,neg)/prob>l); 2180 double *C = new double[prob>l]; 2181 for(int i = 0; i < prob>l; i++) 2182 { 2183 if(prob>y[i] > 0) 2184 C[i] = Cp; 2185 else 2186 C[i] = Cn; 2187 } 2188 fun_obj=new l2r_lr_fun(prob, C); 2189 TRON tron_obj(fun_obj, primal_solver_tol); 1842 2190 tron_obj.set_print_string(liblinear_print_string); 1843 2191 tron_obj.tron(w); 1844 2192 delete fun_obj; 2193 delete C; 1845 2194 break; 1846 2195 } 1847 2196 case L2R_L2LOSS_SVC: 1848 2197 { 1849 fun_obj=new l2r_l2_svc_fun(prob, Cp, Cn); 1850 TRON tron_obj(fun_obj, eps*min(pos,neg)/prob>l); 2198 double *C = new double[prob>l]; 2199 for(int i = 0; i < prob>l; i++) 2200 { 2201 if(prob>y[i] > 0) 2202 C[i] = Cp; 2203 else 2204 C[i] = Cn; 2205 } 2206 fun_obj=new l2r_l2_svc_fun(prob, C); 2207 TRON tron_obj(fun_obj, primal_solver_tol); 1851 2208 tron_obj.set_print_string(liblinear_print_string); 1852 2209 tron_obj.tron(w); 1853 2210 delete fun_obj; 2211 delete C; 1854 2212 break; 1855 2213 } … … 1865 2223 feature_node *x_space = NULL; 1866 2224 transpose(prob, &x_space ,&prob_col); 1867 solve_l1r_l2_svc(&prob_col, w, eps*min(pos,neg)/prob>l, Cp, Cn);2225 solve_l1r_l2_svc(&prob_col, w, primal_solver_tol, Cp, Cn); 1868 2226 delete [] prob_col.y; 1869 2227 delete [] prob_col.x; … … 1876 2234 feature_node *x_space = NULL; 1877 2235 transpose(prob, &x_space ,&prob_col); 1878 solve_l1r_lr(&prob_col, w, eps*min(pos,neg)/prob>l, Cp, Cn);2236 solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn); 1879 2237 delete [] prob_col.y; 1880 2238 delete [] prob_col.x; … … 1885 2243 solve_l2r_lr_dual(prob, w, eps, Cp, Cn); 1886 2244 break; 2245 case L2R_L2LOSS_SVR: 2246 { 2247 double *C = new double[prob>l]; 2248 for(int i = 0; i < prob>l; i++) 2249 C[i] = param>C; 2250 2251 fun_obj=new l2r_l2_svr_fun(prob, C, param>p); 2252 TRON tron_obj(fun_obj, param>eps); 2253 tron_obj.set_print_string(liblinear_print_string); 2254 tron_obj.tron(w); 2255 delete fun_obj; 2256 delete C; 2257 break; 2258 2259 } 2260 case L2R_L1LOSS_SVR_DUAL: 2261 solve_l2r_l1l2_svr(prob, w, param, L2R_L1LOSS_SVR_DUAL); 2262 break; 2263 case L2R_L2LOSS_SVR_DUAL: 2264 solve_l2r_l1l2_svr(prob, w, param, L2R_L2LOSS_SVR_DUAL); 2265 break; 1887 2266 default: 1888 fprintf(stderr, "E rror: unknown solver_type\n");2267 fprintf(stderr, "ERROR: unknown solver_type\n"); 1889 2268 break; 1890 2269 } … … 1909 2288 model_>bias = prob>bias; 1910 2289 1911 int nr_class; 1912 int *label = NULL; 1913 int *start = NULL; 1914 int *count = NULL; 1915 int *perm = Malloc(int,l); 1916 1917 // group training data of the same class 1918 group_classes(prob,&nr_class,&label,&start,&count,perm); 1919 1920 model_>nr_class=nr_class; 1921 model_>label = Malloc(int,nr_class); 1922 for(i=0;i<nr_class;i++) 1923 model_>label[i] = label[i]; 1924 1925 // calculate weighted C 1926 double *weighted_C = Malloc(double, nr_class); 1927 for(i=0;i<nr_class;i++) 1928 weighted_C[i] = param>C; 1929 for(i=0;i<param>nr_weight;i++) 1930 { 1931 for(j=0;j<nr_class;j++) 1932 if(param>weight_label[i] == label[j]) 1933 break; 1934 if(j == nr_class) 1935 fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param>weight_label[i]); 2290 if(param>solver_type == L2R_L2LOSS_SVR  2291 param>solver_type == L2R_L1LOSS_SVR_DUAL  2292 param>solver_type == L2R_L2LOSS_SVR_DUAL) 2293 { 2294 model_>w = Malloc(double, w_size); 2295 model_>nr_class = 2; 2296 model_>label = NULL; 2297 train_one(prob, param, &model_>w[0], 0, 0); 2298 } 2299 else 2300 { 2301 int nr_class; 2302 int *label = NULL; 2303 int *start = NULL; 2304 int *count = NULL; 2305 int *perm = Malloc(int,l); 2306 2307 // group training data of the same class 2308 group_classes(prob,&nr_class,&label,&start,&count,perm); 2309 2310 model_>nr_class=nr_class; 2311 model_>label = Malloc(int,nr_class); 2312 for(i=0;i<nr_class;i++) 2313 model_>label[i] = label[i]; 2314 2315 // calculate weighted C 2316 double *weighted_C = Malloc(double, nr_class); 2317 for(i=0;i<nr_class;i++) 2318 weighted_C[i] = param>C; 2319 for(i=0;i<param>nr_weight;i++) 2320 { 2321 for(j=0;j<nr_class;j++) 2322 if(param>weight_label[i] == label[j]) 2323 break; 2324 if(j == nr_class) 2325 fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param>weight_label[i]); 2326 else 2327 weighted_C[j] *= param>weight[i]; 2328 } 2329 2330 // constructing the subproblem 2331 feature_node **x = Malloc(feature_node *,l); 2332 for(i=0;i<l;i++) 2333 x[i] = prob>x[perm[i]]; 2334 2335 int k; 2336 problem sub_prob; 2337 sub_prob.l = l; 2338 sub_prob.n = n; 2339 sub_prob.x = Malloc(feature_node *,sub_prob.l); 2340 sub_prob.y = Malloc(double,sub_prob.l); 2341 2342 for(k=0; k<sub_prob.l; k++) 2343 sub_prob.x[k] = x[k]; 2344 2345 // multiclass svm by Crammer and Singer 2346 if(param>solver_type == MCSVM_CS) 2347 { 2348 model_>w=Malloc(double, n*nr_class); 2349 for(i=0;i<nr_class;i++) 2350 for(j=start[i];j<start[i]+count[i];j++) 2351 sub_prob.y[j] = i; 2352 Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param>eps); 2353 Solver.Solve(model_>w); 2354 } 1936 2355 else 1937 weighted_C[j] *= param>weight[i]; 1938 } 1939 1940 // constructing the subproblem 1941 feature_node **x = Malloc(feature_node *,l); 1942 for(i=0;i<l;i++) 1943 x[i] = prob>x[perm[i]]; 1944 1945 int k; 1946 problem sub_prob; 1947 sub_prob.l = l; 1948 sub_prob.n = n; 1949 sub_prob.x = Malloc(feature_node *,sub_prob.l); 1950 sub_prob.y = Malloc(int,sub_prob.l); 1951 1952 for(k=0; k<sub_prob.l; k++) 1953 sub_prob.x[k] = x[k]; 1954 1955 // multiclass svm by Crammer and Singer 1956 if(param>solver_type == MCSVM_CS) 1957 { 1958 model_>w=Malloc(double, n*nr_class); 1959 for(i=0;i<nr_class;i++) 1960 for(j=start[i];j<start[i]+count[i];j++) 1961 sub_prob.y[j] = i; 1962 Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param>eps); 1963 Solver.Solve(model_>w); 1964 } 1965 else 1966 { 1967 if(nr_class == 2) 1968 { 1969 model_>w=Malloc(double, w_size); 1970 1971 int e0 = start[0]+count[0]; 1972 k=0; 1973 for(; k<e0; k++) 1974 sub_prob.y[k] = +1; 1975 for(; k<sub_prob.l; k++) 1976 sub_prob.y[k] = 1; 1977 1978 train_one(&sub_prob, param, &model_>w[0], weighted_C[0], weighted_C[1]); 1979 } 1980 else 1981 { 1982 model_>w=Malloc(double, w_size*nr_class); 1983 double *w=Malloc(double, w_size); 1984 for(i=0;i<nr_class;i++) 1985 { 1986 int si = start[i]; 1987 int ei = si+count[i]; 1988 2356 { 2357 if(nr_class == 2) 2358 { 2359 model_>w=Malloc(double, w_size); 2360 2361 int e0 = start[0]+count[0]; 1989 2362 k=0; 1990 for(; k<si; k++) 1991 sub_prob.y[k] = 1; 1992 for(; k<ei; k++) 2363 for(; k<e0; k++) 1993 2364 sub_prob.y[k] = +1; 1994 2365 for(; k<sub_prob.l; k++) 1995 2366 sub_prob.y[k] = 1; 1996 2367 1997 train_one(&sub_prob, param, w, weighted_C[i], param>C); 1998 1999 for(int j=0;j<w_size;j++) 2000 model_>w[j*nr_class+i] = w[j]; 2001 } 2002 free(w); 2003 } 2004 2005 } 2006 2007 free(x); 2008 free(label); 2009 free(start); 2010 free(count); 2011 free(perm); 2012 free(sub_prob.x); 2013 free(sub_prob.y); 2014 free(weighted_C); 2368 train_one(&sub_prob, param, &model_>w[0], weighted_C[0], weighted_C[1]); 2369 } 2370 else 2371 { 2372 model_>w=Malloc(double, w_size*nr_class); 2373 double *w=Malloc(double, w_size); 2374 for(i=0;i<nr_class;i++) 2375 { 2376 int si = start[i]; 2377 int ei = si+count[i]; 2378 2379 k=0; 2380 for(; k<si; k++) 2381 sub_prob.y[k] = 1; 2382 for(; k<ei; k++) 2383 sub_prob.y[k] = +1; 2384 for(; k<sub_prob.l; k++) 2385 sub_prob.y[k] = 1; 2386 2387 train_one(&sub_prob, param, w, weighted_C[i], param>C); 2388 2389 for(int j=0;j<w_size;j++) 2390 model_>w[j*nr_class+i] = w[j]; 2391 } 2392 free(w); 2393 } 2394 2395 } 2396 2397 free(x); 2398 free(label); 2399 free(start); 2400 free(count); 2401 free(perm); 2402 free(sub_prob.x); 2403 free(sub_prob.y); 2404 free(weighted_C); 2405 } 2015 2406 return model_; 2016 2407 } 2017 2408 2018 void cross_validation(const problem *prob, const parameter *param, int nr_fold, int*target)2409 void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target) 2019 2410 { 2020 2411 int i; … … 2043 2434 subprob.l = l(endbegin); 2044 2435 subprob.x = Malloc(struct feature_node*,subprob.l); 2045 subprob.y = Malloc( int,subprob.l);2436 subprob.y = Malloc(double,subprob.l); 2046 2437 2047 2438 k=0; … … 2069 2460 } 2070 2461 2071 intpredict_values(const struct model *model_, const struct feature_node *x, double *dec_values)2462 double predict_values(const struct model *model_, const struct feature_node *x, double *dec_values) 2072 2463 { 2073 2464 int idx; … … 2098 2489 2099 2490 if(nr_class==2) 2100 return (dec_values[0]>0)?model_>label[0]:model_>label[1]; 2491 { 2492 if(model_>param.solver_type == L2R_L2LOSS_SVR  2493 model_>param.solver_type == L2R_L1LOSS_SVR_DUAL  2494 model_>param.solver_type == L2R_L2LOSS_SVR_DUAL) 2495 return dec_values[0]; 2496 else 2497 return (dec_values[0]>0)?model_>label[0]:model_>label[1]; 2498 } 2101 2499 else 2102 2500 { … … 2111 2509 } 2112 2510 2113 intpredict(const model *model_, const feature_node *x)2511 double predict(const model *model_, const feature_node *x) 2114 2512 { 2115 2513 double *dec_values = Malloc(double, model_>nr_class); 2116 intlabel=predict_values(model_, x, dec_values);2514 double label=predict_values(model_, x, dec_values); 2117 2515 free(dec_values); 2118 2516 return label; 2119 2517 } 2120 2518 2121 intpredict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)2519 double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates) 2122 2520 { 2123 2521 if(check_probability_model(model_)) … … 2131 2529 nr_w = nr_class; 2132 2530 2133 intlabel=predict_values(model_, x, prob_estimates);2531 double label=predict_values(model_, x, prob_estimates); 2134 2532 for(i=0;i<nr_w;i++) 2135 2533 prob_estimates[i]=1/(1+exp(prob_estimates[i])); … … 2147 2545 } 2148 2546 2149 return label; 2547 return label; 2150 2548 } 2151 2549 else … … 2156 2554 { 2157 2555 "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS", 2158 "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL", NULL 2556 "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL", 2557 "", "", "", 2558 "L2R_L2LOSS_SVR", "L2R_L2LOSS_SVR_DUAL", "L2R_L1LOSS_SVR_DUAL", NULL 2159 2559 }; 2160 2560 … … 2174 2574 if(fp==NULL) return 1; 2175 2575 2576 char *old_locale = strdup(setlocale(LC_ALL, NULL)); 2577 setlocale(LC_ALL, "C"); 2578 2176 2579 int nr_w; 2177 2580 if(model_>nr_class==2 && model_>param.solver_type != MCSVM_CS) … … 2182 2585 fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]); 2183 2586 fprintf(fp, "nr_class %d\n", model_>nr_class); 2184 fprintf(fp, "label"); 2185 for(i=0; i<model_>nr_class; i++) 2186 fprintf(fp, " %d", model_>label[i]); 2187 fprintf(fp, "\n"); 2587 2588 if(model_>label) 2589 { 2590 fprintf(fp, "label"); 2591 for(i=0; i<model_>nr_class; i++) 2592 fprintf(fp, " %d", model_>label[i]); 2593 fprintf(fp, "\n"); 2594 } 2188 2595 2189 2596 fprintf(fp, "nr_feature %d\n", nr_feature); … … 2199 2606 fprintf(fp, "\n"); 2200 2607 } 2608 2609 setlocale(LC_ALL, old_locale); 2610 free(old_locale); 2201 2611 2202 2612 if (ferror(fp) != 0  fclose(fp) != 0) return 1; … … 2219 2629 model_>label = NULL; 2220 2630 2631 char *old_locale = strdup(setlocale(LC_ALL, NULL)); 2632 setlocale(LC_ALL, "C"); 2633 2221 2634 char cmd[81]; 2222 2635 while(1) … … 2238 2651 { 2239 2652 fprintf(stderr,"unknown solver type.\n"); 2653 2654 setlocale(LC_ALL, old_locale); 2240 2655 free(model_>label); 2241 2656 free(model_); 2657 free(old_locale); 2242 2658 return NULL; 2243 2659 } … … 2272 2688 { 2273 2689 fprintf(stderr,"unknown text in model file: [%s]\n",cmd); 2690 setlocale(LC_ALL, old_locale); 2691 free(model_>label); 2274 2692 free(model_); 2693 free(old_locale); 2275 2694 return NULL; 2276 2695 } … … 2297 2716 fscanf(fp, "\n"); 2298 2717 } 2718 2719 setlocale(LC_ALL, old_locale); 2720 free(old_locale); 2721 2299 2722 if (ferror(fp) != 0  fclose(fp) != 0) return NULL; 2300 2723 … … 2352 2775 if(param>C <= 0) 2353 2776 return "C <= 0"; 2777 2778 if(param>p < 0) 2779 return "p < 0"; 2354 2780 2355 2781 if(param>solver_type != L2R_LR … … 2360 2786 && param>solver_type != L1R_L2LOSS_SVC 2361 2787 && param>solver_type != L1R_LR 2362 && param>solver_type != L2R_LR_DUAL) 2788 && param>solver_type != L2R_LR_DUAL 2789 && param>solver_type != L2R_L2LOSS_SVR 2790 && param>solver_type != L2R_L2LOSS_SVR_DUAL 2791 && param>solver_type != L2R_L1LOSS_SVR_DUAL) 2363 2792 return "unknown solver type"; 2364 2793 … … 2375 2804 void set_print_string_function(void (*print_func)(const char*)) 2376 2805 { 2377 if (print_func == NULL) 2806 if (print_func == NULL) 2378 2807 liblinear_print_string = &print_string_stdout; 2379 2808 else
Note: See TracChangeset
for help on using the changeset viewer.