SHOGUN  3.2.1
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义 
SingleLaplacianInferenceMethodWithLBFGS.cpp
浏览该文件的文档.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 Wu Lin
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  *
30  * Code adapted from Gaussian Process Machine Learning Toolbox
31  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
32  */
34 
35 #ifdef HAVE_EIGEN3
39 
40 namespace shogun
41 {
42 
45 {
46  init();
47 }
48 
50  CKernel* kern,
51  CFeatures* feat,
52  CMeanFunction* m,
53  CLabels* lab,
54  CLikelihoodModel* mod)
55  : CSingleLaplacianInferenceMethod(kern, feat, m, lab, mod)
56 {
57  init();
58 }
59 
61  bool enable_newton_if_fail)
62 {
63  m_enable_newton_if_fail = enable_newton_if_fail;
64 }
65 
67  int m,
68  int max_linesearch,
69  int linesearch,
70  int max_iterations,
72  int past,
74  float64_t min_step,
75  float64_t max_step,
76  float64_t ftol,
77  float64_t wolfe,
78  float64_t gtol,
79  float64_t xtol,
80  float64_t orthantwise_c,
81  int orthantwise_start,
82  int orthantwise_end)
83 {
84  m_m = m;
85  m_max_linesearch = max_linesearch;
86  m_linesearch = linesearch;
87  m_max_iterations = max_iterations;
88  m_delta = delta;
89  m_past = past;
90  m_epsilon = epsilon;
91  m_min_step = min_step;
92  m_max_step = max_step;
93  m_ftol = ftol;
94  m_wolfe = wolfe;
95  m_gtol = gtol;
96  m_xtol = xtol;
97  m_orthantwise_c = orthantwise_c;
98  m_orthantwise_start = orthantwise_start;
99  m_orthantwise_end = orthantwise_end;
100 }
101 
102 void CSingleLaplacianInferenceMethodWithLBFGS::init()
103 {
105  set_newton_method(false);
106  m_mean_f = NULL;
107  SG_ADD(&m_m, "m",
108  "The number of corrections to approximate the inverse hessian matrix",
110  SG_ADD(&m_max_linesearch, "max_linesearch",
111  "The maximum number of trials to do line search for each L-BFGS update",
113  SG_ADD(&m_linesearch, "linesearch",
114  "The line search algorithm",
116  SG_ADD(&m_max_iterations, "max_iterations",
117  "The maximum number of iterations for L-BFGS update",
119  SG_ADD(&m_delta, "delta",
120  "Delta for convergence test based on the change of function value",
122  SG_ADD(&m_past, "past",
123  "Distance for delta-based convergence test",
125  SG_ADD(&m_epsilon, "epsilon",
126  "Epsilon for convergence test based on the change of gradient",
128  SG_ADD(&m_min_step, "min_step",
129  "The minimum step of the line search",
131  SG_ADD(&m_max_step, "max_step",
132  "The maximum step of the line search",
134  SG_ADD(&m_ftol, "ftol",
135  "A parameter used in Armijo condition",
137  SG_ADD(&m_wolfe, "wolfe",
138  "A parameter used in curvature condition",
140  SG_ADD(&m_gtol, "gtol",
141  "A parameter used in Morethuente linesearch to control the accuracy",
143  SG_ADD(&m_xtol, "xtol",
144  "The machine precision for floating-point values",
146  SG_ADD(&m_orthantwise_c, "orthantwise_c",
147  "Coeefficient for the L1 norm of variables",
149  SG_ADD(&m_orthantwise_start, "orthantwise_start",
150  "Start index for computing L1 norm of the variables",
152  SG_ADD(&m_orthantwise_end, "orthantwise_end",
153  "End index for computing L1 norm of the variables",
155  SG_ADD(&m_enable_newton_if_fail, "enable_newton_if_fail",
156  "Enable the original Newton method if the L-BFGS method fails",
158 }
159 
161 {
162 }
163 
164 float64_t CSingleLaplacianInferenceMethodWithLBFGS::evaluate(
165  void *obj,
166  const float64_t *alpha,
167  float64_t *gradient,
168  const int dim,
169  const float64_t step)
170 {
171  /* Note that alpha = alpha_pre_iter - step * gradient_pre_iter */
172 
173  /* Unfortunately we can not use dynamic_cast to cast the void * pointer to an
174  * object pointer. Therefore, make sure this method is private.
175  */
177  = static_cast<CSingleLaplacianInferenceMethodWithLBFGS *>(obj);
178  float64_t * alpha_cast = const_cast<float64_t *>(alpha);
179  float64_t psi = 0.0;
180  obj_prt->get_psi_wrt_alpha(alpha_cast, dim, psi);
181  obj_prt->get_gradient_wrt_alpha(alpha_cast, gradient, dim);
182  return psi;
183 }
184 
186 {
187  float64_t psi_new;
188  float64_t psi_def;
189 
190  /* get mean vector and create eigen representation of it*/
192  Eigen::Map<Eigen::VectorXd> eigen_mean_f(mean_f.vector, mean_f.vlen);
193 
194  /* create eigen representation of kernel matrix*/
197  m_ktrtr.num_cols);
198 
199  /* create shogun and eigen representation of function vector*/
200  m_mu = SGVector<float64_t>(mean_f.vlen);
202 
204  {
205  /* set alpha a zero vector*/
207  m_alpha.zero();
208 
209  /* f = mean, if length of alpha and length of y doesn't match*/
210  eigen_mu = eigen_mean_f;
211  psi_new = -SGVector<float64_t>::sum(
213  }
214  else
215  {
216  /* compute f = K * alpha + m*/
218  eigen_mu = eigen_ktrtr * (eigen_alpha * CMath::sq(m_scale)) + eigen_mean_f;
219  psi_new = eigen_alpha.dot(eigen_mu - eigen_mean_f) / 2.0;
221 
222  psi_def = -SGVector<float64_t>::sum(
224 
225  /* if default is better, then use it*/
226  if (psi_def < psi_new)
227  {
228  m_alpha.zero();
229  eigen_mu = eigen_mean_f;
230  psi_new = psi_def;
231  }
232  }
234  lbfgs_parameter_t lbfgs_param;
235  lbfgs_param.m = m_m;
236  lbfgs_param.max_linesearch = m_max_linesearch;
237  lbfgs_param.linesearch = m_linesearch;
238  lbfgs_param.max_iterations = m_max_iterations;
239  lbfgs_param.delta = m_delta;
240  lbfgs_param.past = m_past;
241  lbfgs_param.epsilon = m_epsilon;
242  lbfgs_param.min_step = m_min_step;
243  lbfgs_param.max_step = m_max_step;
244  lbfgs_param.ftol = m_ftol;
245  lbfgs_param.wolfe = m_wolfe;
246  lbfgs_param.gtol = m_gtol;
247  lbfgs_param.xtol = m_xtol;
248  lbfgs_param.orthantwise_c = m_orthantwise_c;
249  lbfgs_param.orthantwise_start = m_orthantwise_start;
250  lbfgs_param.orthantwise_end = m_orthantwise_end;
251 
252  /* use for passing variables to compute function value and gradient*/
253  m_mean_f = &mean_f;
254 
255  /* In order to use the provided lbfgs function, we have to pass the object via
256  * void * pointer, which the evaluate method will use static_cast to cast
257  * the pointer to an object pointer.
258  * Therefore, make sure the evaluate method is a private method of the class.
259  * Because the evaluate method is defined in a class, we have to pass the
260  * method pointer to the lbfgs function via static method
261  * If we also use the progress method, make sure the method is static and
262  * private.
263  */
264  void * obj_prt = static_cast<void *>(this);
265 
266  int ret = lbfgs(m_alpha.vlen, m_alpha.vector, &psi_new,
267  CSingleLaplacianInferenceMethodWithLBFGS::evaluate,
268  NULL, obj_prt, &lbfgs_param);
269  /* clean up*/
270  m_mean_f = NULL;
271 
272  /* Note that ret should be zero if the minimization
273  * process terminates without an error.
274  * A non-zero value indicates an error.
275  */
276  if (m_enable_newton_if_fail && ret != 0 && ret != LBFGS_ALREADY_MINIMIZED)
277  {
278  /* If some error happened during the L-BFGS optimization, we use the original
279  * Newton method.
280  */
281  SG_WARNING("Error during L-BFGS optimization, using original Newton method as fallback\n");
283  return;
284  }
285 
286  /* compute f = K * alpha + m*/
287  eigen_mu = eigen_ktrtr * (eigen_alpha * CMath::sq(m_scale)) + eigen_mean_f;
288 
289  /* get log probability derivatives*/
293 
294  /* W = -d2lp*/
295  m_W = m_d2lp.clone();
296  m_W.scale(-1.0);
297 
298  /* compute sW*/
300  /* create shogun and eigen representation of sW*/
303 
304  if (eigen_W.minCoeff() > 0)
305  eigen_sW = eigen_W.cwiseSqrt();
306  else
307  eigen_sW.setZero();
308 }
309 
310 void CSingleLaplacianInferenceMethodWithLBFGS::get_psi_wrt_alpha(
311  float64_t *alpha,
312  const int dim,
313  float64_t &psi)
314 {
315  Eigen::Map<Eigen::VectorXd> eigen_alpha(alpha, dim);
316  SGVector<float64_t> f(dim);
317  Eigen::Map<Eigen::VectorXd> eigen_f(f.vector, f.vlen);
320  m_ktrtr.num_cols);
321  Eigen::Map<Eigen::VectorXd> eigen_mean_f(m_mean_f->vector,
322  m_mean_f->vlen);
323  /* f = K * alpha + mean_f given alpha*/
324  eigen_f
325  = kernel * ((eigen_alpha) * CMath::sq(m_scale)) + eigen_mean_f;
326 
327  /* psi = 0.5 * alpha .* (f - m) - sum(dlp)*/
328  psi = eigen_alpha.dot(eigen_f - eigen_mean_f) * 0.5;
330 }
331 
332 void CSingleLaplacianInferenceMethodWithLBFGS::get_gradient_wrt_alpha(
333  float64_t *alpha,
334  float64_t *gradient,
335  const int dim)
336 {
337  Eigen::Map<Eigen::VectorXd> eigen_alpha(alpha, dim);
338  Eigen::Map<Eigen::VectorXd> eigen_gradient(gradient, dim);
339  SGVector<float64_t> f(dim);
340  Eigen::Map<Eigen::VectorXd> eigen_f(f.vector, f.vlen);
343  m_ktrtr.num_cols);
344  Eigen::Map<Eigen::VectorXd> eigen_mean_f(m_mean_f->vector,
345  m_mean_f->vlen);
346 
347  /* f = K * alpha + mean_f given alpha*/
348  eigen_f = kernel * ((eigen_alpha) * CMath::sq(m_scale)) + eigen_mean_f;
349 
350  SGVector<float64_t> dlp_f =
352 
353  Eigen::Map<Eigen::VectorXd> eigen_dlp_f(dlp_f.vector, dlp_f.vlen);
354 
355  /* g_alpha = K * (alpha - dlp_f)*/
356  eigen_gradient = kernel * ((eigen_alpha - eigen_dlp_f) * CMath::sq(m_scale));
357 }
358 
359 } /* namespace shogun */
360 #endif /* HAVE_EIGEN3 */
virtual SGVector< float64_t > get_log_probability_f(const CLabels *lab, SGVector< float64_t > func) const =0
SGVector< float64_t > m_alpha
int32_t lbfgs(int32_t n, float64_t *x, float64_t *ptr_fx, lbfgs_evaluate_t proc_evaluate, lbfgs_progress_t proc_progress, void *instance, lbfgs_parameter_t *_param, lbfgs_adjust_step_t proc_adjust_step)
Definition: lbfgs.cpp:208
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual int32_t get_num_labels() const =0
virtual void set_lbfgs_parameters(int m=100, int max_linesearch=1000, int linesearch=LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE, int max_iterations=1000, float64_t delta=0.0, int past=0, float64_t epsilon=1e-5, float64_t min_step=1e-20, float64_t max_step=1e+20, float64_t ftol=1e-4, float64_t wolfe=0.9, float64_t gtol=0.9, float64_t xtol=1e-16, float64_t orthantwise_c=0.0, int orthantwise_start=0, int orthantwise_end=1)
static T sq(T x)
x^2
Definition: Math.h:395
float64_t orthantwise_c
Definition: lbfgs.h:311
index_t num_cols
Definition: SGMatrix.h:331
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
An abstract class of the mean function.
Definition: MeanFunction.h:28
void scale(T alpha)
Scale vector inplace.
Definition: SGVector.cpp:956
index_t num_rows
Definition: SGMatrix.h:329
static const float64_t epsilon
Definition: libbmrm.cpp:24
index_t vlen
Definition: SGVector.h:637
The SingleLaplace approximation inference method class for regression and binary Classification.
double float64_t
Definition: common.h:50
static T sum(T *vec, int32_t len)
Return sum(vec)
Definition: SGVector.h:494
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:68
The Laplace approximation inference method with LBFGS class for regression and binary classification...
SGVector< T > clone() const
Definition: SGVector.cpp:278
virtual SGVector< float64_t > get_log_probability_derivative_f(const CLabels *lab, SGVector< float64_t > func, index_t i) const =0
The Kernel base class.
Definition: Kernel.h:153
#define SG_WARNING(...)
Definition: SGIO.h:128
#define SG_ADD(...)
Definition: SGObject.h:81
#define delta
Definition: sfa.cpp:23
The Likelihood model base class.
SGMatrix< float64_t > m_ktrtr
CLikelihoodModel * m_model

SHOGUN 机器学习工具包 - 项目文档