14 #include <shogun/lib/external/libqp.h>
28 return (&HMatrix[maxCPs*i]);
70 float64_t d1 = g_lo+g_hi - 3*(f_lo-f_hi)/(a_lo-a_hi);
72 float64_t a_j = a_hi -(a_hi-a_lo)*(g_hi+d2-d1)/(g_hi-g_lo+2*d2);
76 if ((a_j < a_lo) || (a_j > a_hi))
78 a_j = 0.5*(a_lo+a_hi);
83 if ((a_j > a_lo) || (a_j < a_hi))
85 a_j = 0.5*(a_lo+a_hi);
89 cur_solution.
add(cur_solution.
vector, 1.0,
90 initial_solution.
vector, a_j,
95 = 0.5*lambda*cur_solution.
dot(cur_solution.
vector,
103 (cur_fval > (initial_fval+wolfe_c1*a_j*init_lgrad))
117 if (
CMath::abs(cur_lgrad) < -wolfe_c2*init_lgrad)
120 ls_res.
fval = cur_fval;
121 ls_res.
reg = cur_reg;
128 if (cur_lgrad*(a_hi - a_lo) >= 0)
147 ls_res.
fval = cur_fval;
148 ls_res.
reg = cur_reg;
192 std::vector<line_search_res> ret;
207 initial_step.
fval = cur_fval;
208 initial_step.
reg = cur_reg;
209 initial_step.
gradient = cur_subgrad;
211 ret.push_back(initial_step);
219 (cur_fval > initial_val+wolfe_c1*cur_a*initial_lgrad)
221 (cur_fval >= prev_fval && iter > 0)
225 zoom(machine, lambda, prev_a, cur_a, initial_val,
226 initial_solution, search_dir, wolfe_c1, wolfe_c2,
227 initial_lgrad, prev_fval, prev_lgrad, cur_fval, cur_lgrad)
232 if (
CMath::abs(cur_lgrad) <= -wolfe_c2*initial_lgrad)
236 ls_res.
fval = cur_fval;
237 ls_res.
reg = cur_reg;
240 ret.push_back(ls_res);
247 zoom(machine, lambda, cur_a, prev_a, initial_val,
248 initial_solution, search_dir, wolfe_c1, wolfe_c2,
249 initial_lgrad, cur_fval, cur_lgrad, prev_fval, prev_lgrad)
254 if ((
CMath::abs(cur_a - amax) <= 0.01*amax) || (iter >= max_iter))
258 ls_res.
fval = cur_fval;
259 ls_res.
reg = cur_reg;
262 ret.push_back(ls_res);
267 prev_fval = cur_fval;
268 prev_lgrad = cur_lgrad;
270 cur_a = (cur_a + amax)*0.5;
286 for (uint32_t i=0; i < ncbm.
nCP; ++i)
323 libqp_state_T qp_exitflag={0, 0, 0, 0};
326 int32_t w_dim = model->
get_dim();
383 || icp_stats.
ACPs == NULL || icp_stats.
H_buff == NULL
410 best_risk = cur_risk;
415 bmrm_ll *CPList_head, *CPList_tail, *cp_ptr, *cp_list=NULL;
432 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
435 SG_SPRINT(
"%4d: tim=%.3lf, Fp=%lf, Fd=%lf, R=%lf\n",
436 ncbm.
nIter, tstop-tstart, ncbm.
Fp, ncbm.
Fd, cur_risk);
456 ncbm.
Fd = -qp_exitflag.QP;
464 for (uint32_t i=0; i < ncbm.
nCP; ++i)
481 clean_icp(&icp_stats, ncbm, &CPList_head, &CPList_tail,
491 for (uint32_t i=0; i < ncbm.
nCP; ++i)
498 bool calc_gap =
false;
504 for (uint32_t i=0; i < ncbm.
nCP; ++i)
507 cp_ptr = cp_ptr->
next;
508 scores[i] = cur_w.
dot(cur_w.
vector, a_1, w_dim);
516 SG_SPRINT(
"%4d: primal:%f dual:%f QP_gap:%f\n", ncbm.
nIter, PO, ncbm.
Fd, QP_gap)
523 if ((best_Fp - ncbm.
Fd) <= TolAbs)
526 if (ncbm.
nCP >= maxCPs)
530 if (ncbm.
nCP+3 >= maxCPs)
537 SG_SPRINT(
"%4d: tim=%.3lf, Fp=%lf, Fd=%lf, (Fp-Fd)=%lf, (Fp-Fd)/Fp=%lf, R=%lf, nCP=%d, nzA=%d, QPexitflag=%d, best_fp=%f, gap=%f\n",
539 (ncbm.
Fp-ncbm.
Fd)/ncbm.
Fp, cur_risk, ncbm.
nCP, ncbm.
nzA, qp_exitflag.exitflag, best_Fp, (best_Fp-ncbm.
Fd)/best_Fp);
544 std::vector<line_search_res> wbest_candidates;
554 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
562 wbest_candidates.push_back(ls);
573 uint32_t cp_min_approx = 0;
574 if (cp_min_approx || (ncbm.
nIter == 1))
586 std::vector<line_search_res> ls_res
589 if (ls_res[0].fval != ls_res[1].fval)
591 ls_res[0].gradient.vec1_plus_scalar_times_vec2(ls_res[0].gradient.vector, -_lambda, ls_res[0].solution.vector, w_dim);
598 - (ls_res[0].fval - ls_res[0].reg);
600 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
602 wbest_candidates.push_back(ls_res[0]);
605 ls_res[1].gradient.vec1_plus_scalar_times_vec2(ls_res[1].gradient.vector, -_lambda, ls_res[1].solution.vector, w_dim);
608 find_free_idx(map, maxCPs), ls_res[1].gradient.vector, w_dim);
611 = ls_res[1].solution.
dot(ls_res[1].solution.vector, ls_res[1].gradient.vector, w_dim)
612 - (ls_res[1].fval - ls_res[1].reg);
614 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
616 wbest_candidates.push_back(ls_res[1]);
618 if ((best_Fp <= ls_res[1].fval) && (astart != 1))
628 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
637 wbest_candidates.push_back(ls);
640 astar = ls_res[1].a * norm_dir;
643 SG_SPRINT(
"\t\tline search time: %.5lf\n", tstop-tstart)
648 SG_SPRINT(
"\t searching for the best Fp:\n")
649 for (
size_t i = 0; i < wbest_candidates.size(); i++)
652 SG_SPRINT(
"\t\t %d fcurrent: %.16lf\n", i, wbest_candidates[i].fval)
654 if (wbest_candidates[i].fval < best_Fp)
656 best_Fp = wbest_candidates[i].fval;
657 best_risk = wbest_candidates[i].fval - wbest_candidates[i].reg;
658 memcpy(best_w, wbest_candidates[i].solution.
vector,
sizeof(
float64_t)*w_dim);
659 memcpy(best_subgrad.
vector, wbest_candidates[i].gradient.vector,
sizeof(
float64_t)*w_dim);
670 index_t cp_idx = ncbm.
nCP-(wbest_candidates.size()-i);
675 wbest_candidates[i].gradient.vector, w_dim)
676 + (-1.0*bias[cp_idx]);
677 if (score > best_risk)
682 wbest_candidates[i].gradient.vector, w_dim);
685 = best_Fp - wbest_candidates[i].reg
687 wbest_candidates[i].gradient.vector, w_dim);
690 SG_SPRINT(
"CONFLICT Rbest=%.6lg score=%g L=%.6lg U=%.6lg\n", best_risk, score, L, U)
694 SG_SPRINT(
"%.6lf < %.6lf => changing bias[%d]=%g\n", L, U, cp_idx, L)
699 wbest_candidates[i].gradient.
zero();
702 cp_ptr = CPList_tail;
703 for (
size_t j = wbest_candidates.size()-1; i < j; --j)
705 cp_ptr = cp_ptr->
prev;
713 cp_ptr = CPList_head;
714 for (uint32_t j = 0; j < ncbm.
nCP-1; ++j)
717 cp_ptr = cp_ptr->
next;
728 wbest_candidates[i].gradient.vector, w_dim);
732 = best_Fp - wbest_candidates[i].reg
734 wbest_candidates[i].gradient.vector, w_dim);
737 SG_SPRINT(
"solved by changing nCP=%d bias:%g (%g)\n", cp_idx, bias[cp_idx], L)
#define LIBBMRM_CALLOC(x, y)
static T twonorm(const T *x, int32_t len)
|| x ||_2
Class Time that implements a stopwatch based on either cpu time or wall clock time.
static const double * get_col(uint32_t j)
Class DualLibQPBMSOSVM that uses Bundle Methods for Regularized Risk Minimization algorithms for stru...
float64_t * get_cutting_plane(bmrm_ll *ptr)
static const float64_t INFTY
infinity
void update_H(BmrmStatistics &ncbm, bmrm_ll *head, bmrm_ll *tail, SGMatrix< float64_t > &H, SGVector< float64_t > &diag_H, float64_t lambda, uint32_t maxCP, int32_t w_dim)
virtual int32_t get_dim() const =0
uint32_t find_free_idx(bool *map, uint32_t size)
static line_search_res zoom(CDualLibQPBMSOSVM *machine, float64_t lambda, float64_t a_lo, float64_t a_hi, float64_t initial_fval, SGVector< float64_t > &initial_solution, SGVector< float64_t > &search_dir, float64_t wolfe_c1, float64_t wolfe_c2, float64_t init_lgrad, float64_t f_lo, float64_t g_lo, float64_t f_hi, float64_t g_hi)
SGVector< float64_t > solution
static const float64_t epsilon
float64_t cur_time_diff(bool verbose=false)
static const uint32_t QPSolverMaxIter
static float64_t * HMatrix
virtual float64_t risk(float64_t *subgrad, float64_t *W, TMultipleCPinfo *info=0, EStructRiskType rtype=N_SLACK_MARGIN_RESCALING)
static T max(T a, T b)
return the maximum of two integers
#define LIBBMRM_MEMCPY(x, y, z)
static void vec1_plus_scalar_times_vec2(T *vec1, const T scalar, const T *vec2, int32_t n)
x=x+alpha*y
std::vector< line_search_res > line_search_with_strong_wolfe(CDualLibQPBMSOSVM *machine, float64_t lambda, float64_t initial_val, SGVector< float64_t > &initial_solution, SGVector< float64_t > &initial_grad, SGVector< float64_t > &search_dir, float64_t astart, float64_t amax=1.1, float64_t wolfe_c1=1E-4, float64_t wolfe_c2=0.9, float64_t max_iter=5)
static float64_t dot(const bool *v1, const bool *v2, int32_t n)
Compute dot product between v1 and v2 (blas optimized)
Class CStructuredModel that represents the application specific model and contains most of the applic...
#define LIBBMRM_INDEX(ROW, COL, NUM_ROWS)
all of classes and functions are contained in the shogun namespace
#define IGNORE_IN_CLASSLIST
void add_cutting_plane(bmrm_ll **tail, bool *map, float64_t *A, uint32_t free_idx, float64_t *cp_data, uint32_t dim)
static T min(T a, T b)
return the minimum of two integers
BmrmStatistics svm_ncbm_solver(CDualLibQPBMSOSVM *machine, float64_t *w, float64_t TolRel, float64_t TolAbs, float64_t _lambda, uint32_t _BufSize, bool cleanICP, uint32_t cleanAfter, bool is_convex, bool line_search, bool verbose)
CStructuredModel * get_model() const
SGVector< float64_t > gradient
void resize_vector(int32_t n)
static float32_t sqrt(float32_t x)
x^0.5
void clean_icp(ICP_stats *icp_stats, BmrmStatistics &bmrm, bmrm_ll **head, bmrm_ll **tail, float64_t *&Hmat, float64_t *&diag_H, float64_t *&beta, bool *&map, uint32_t cleanAfter, float64_t *&b, uint32_t *&I, uint32_t cp_models)
void set_const(T const_elem)
void add(const SGVector< T > x)
static T abs(T a)
return the absolute value of a number