Commit fbb3a219 authored by davidkep's avatar davidkep

Improve the linearized ADMM optimizer for weighted LS

parent dd82d05d
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
#ifndef NSOPTIM_OBJECTIVE_LS_LOSS_HPP_ #ifndef NSOPTIM_OBJECTIVE_LS_LOSS_HPP_
#define NSOPTIM_OBJECTIVE_LS_LOSS_HPP_ #define NSOPTIM_OBJECTIVE_LS_LOSS_HPP_
#include <algorithm>
#include <nsoptim/armadillo.hpp> #include <nsoptim/armadillo.hpp>
#include <nsoptim/objective/convex.hpp> #include <nsoptim/objective/convex.hpp>
#include <nsoptim/container/data.hpp> #include <nsoptim/container/data.hpp>
...@@ -62,7 +64,7 @@ class WeightedLsLoss : public LossFunction<PredictorResponseData>, ...@@ -62,7 +64,7 @@ class WeightedLsLoss : public LossFunction<PredictorResponseData>,
const bool include_intercept = true) noexcept const bool include_intercept = true) noexcept
: include_intercept_(include_intercept), data_(data), mean_weight_(arma::mean(*weights)), : include_intercept_(include_intercept), data_(data), mean_weight_(arma::mean(*weights)),
sqrt_weights_(std::make_shared<const arma::vec>(arma::sqrt(*weights / mean_weight_))), sqrt_weights_(std::make_shared<const arma::vec>(arma::sqrt(*weights / mean_weight_))),
weighted_pred_norm_(ls_loss::TwoNormUpper(data->cx(), *sqrt_weights_)) {} weighted_pred_norm_(-1) {}
WeightedLsLoss(std::shared_ptr<const PredictorResponseData> data, const arma::vec& weights, WeightedLsLoss(std::shared_ptr<const PredictorResponseData> data, const arma::vec& weights,
const bool include_intercept = true) noexcept const bool include_intercept = true) noexcept
...@@ -116,6 +118,25 @@ class WeightedLsLoss : public LossFunction<PredictorResponseData>, ...@@ -116,6 +118,25 @@ class WeightedLsLoss : public LossFunction<PredictorResponseData>,
//! @return the difference between `x` and `y`. //! @return the difference between `x` and `y`.
template<typename T> template<typename T>
double Difference(const RegressionCoefficients<T>& x, const RegressionCoefficients<T>& y) const { double Difference(const RegressionCoefficients<T>& x, const RegressionCoefficients<T>& y) const {
const double weighted_pred_norm = (weighted_pred_norm_ < 0) ? ls_loss::TwoNormUpper(data_->cx(), *sqrt_weights_) :
weighted_pred_norm_;
return std::sqrt(sqrt_weights_->n_elem * mean_weight_) * std::abs(x.intercept - y.intercept) +
weighted_pred_norm * arma::norm(x.beta - y.beta, 2);
}
//! Get the difference between two sets of regression coefficients.
//!
//! For the weighted LS loss, the difference is an approximation to the 2-norm of the matrix-vector product
//! ||W . X . (beta1 - beta2) + w ( mu1 - mu2 )||_2 < |mu1 - mu2| sqrt(sum(w)) + ||W X|| ||beta1 - beta2||_2
//!
//! @param x a set of regression coefficients.
//! @param y the other set of regression coefficients.
//! @return the difference between `x` and `y`.
template<typename T>
double Difference(const RegressionCoefficients<T>& x, const RegressionCoefficients<T>& y) {
if (weighted_pred_norm_ < 0) {
weighted_pred_norm_ = ls_loss::TwoNormUpper(data_->cx(), *sqrt_weights_);
}
return std::sqrt(sqrt_weights_->n_elem * mean_weight_) * std::abs(x.intercept - y.intercept) + return std::sqrt(sqrt_weights_->n_elem * mean_weight_) * std::abs(x.intercept - y.intercept) +
weighted_pred_norm_ * arma::norm(x.beta - y.beta, 2); weighted_pred_norm_ * arma::norm(x.beta - y.beta, 2);
} }
...@@ -137,13 +158,13 @@ class WeightedLsLoss : public LossFunction<PredictorResponseData>, ...@@ -137,13 +158,13 @@ class WeightedLsLoss : public LossFunction<PredictorResponseData>,
template<typename T> template<typename T>
GradientType<T> Gradient(const RegressionCoefficients<T>& coefs) const { GradientType<T> Gradient(const RegressionCoefficients<T>& coefs) const {
if (include_intercept_) { if (include_intercept_) {
const arma::vec neg_weighted_residuals = mean_weight_ * (*sqrt_weights_) % (*sqrt_weights_) % const arma::vec neg_weighted_residuals = mean_weight_ * arma::square(*sqrt_weights_) %
(data_->cx() * coefs.beta + coefs.intercept - data_->cy()); (data_->cx() * coefs.beta + coefs.intercept - data_->cy());
return GradientType<T>(arma::mean(neg_weighted_residuals), return GradientType<T>(arma::mean(neg_weighted_residuals),
arma::mean(data_->cx().each_col() % neg_weighted_residuals, 0)); arma::mean(data_->cx().each_col() % neg_weighted_residuals, 0));
} }
const arma::vec neg_weighted_residuals = mean_weight_ * (*sqrt_weights_) % (*sqrt_weights_) % const arma::vec neg_weighted_residuals = mean_weight_ * arma::square(*sqrt_weights_) %
(data_->cx() * coefs.beta - data_->cy()); (data_->cx() * coefs.beta - data_->cy());
return GradientType<T>(0, -arma::mean(data_->cx().each_col() % neg_weighted_residuals, 0)); return GradientType<T>(0, -arma::mean(data_->cx().each_col() % neg_weighted_residuals, 0));
} }
...@@ -158,8 +179,8 @@ class WeightedLsLoss : public LossFunction<PredictorResponseData>, ...@@ -158,8 +179,8 @@ class WeightedLsLoss : public LossFunction<PredictorResponseData>,
//! Access the un-normalized weights used by this weighted LS loss function. //! Access the un-normalized weights used by this weighted LS loss function.
//! //!
//! @return a vector of weights. //! @return a vector of weights.
const WeightsType weights() const noexcept { WeightsType weights() const noexcept {
return mean_weight_ * (*sqrt_weights_) % (*sqrt_weights_); return mean_weight_ * arma::square(*sqrt_weights_);
} }
//! Access the normalized sqrt-weights used by this weighted LS loss function. //! Access the normalized sqrt-weights used by this weighted LS loss function.
...@@ -213,8 +234,7 @@ class LsLoss : public LossFunction<PredictorResponseData>, ...@@ -213,8 +234,7 @@ class LsLoss : public LossFunction<PredictorResponseData>,
struct is_ls_loss_tag {}; struct is_ls_loss_tag {};
explicit LsLoss(std::shared_ptr<const PredictorResponseData> data, const bool include_intercept = true) explicit LsLoss(std::shared_ptr<const PredictorResponseData> data, const bool include_intercept = true)
: include_intercept_(include_intercept), data_(data), : include_intercept_(include_intercept), data_(data), pred_norm_(-1) {}
pred_norm_((data_->n_pred() < data_->n_obs()) ? arma::norm(data->cx(), "inf") : arma::norm(data->cx(), 1)) {}
LsLoss(const LsLoss& other) = default; LsLoss(const LsLoss& other) = default;
LsLoss(LsLoss&& other) = default; LsLoss(LsLoss&& other) = default;
...@@ -268,6 +288,25 @@ class LsLoss : public LossFunction<PredictorResponseData>, ...@@ -268,6 +288,25 @@ class LsLoss : public LossFunction<PredictorResponseData>,
//! @return the relative difference between `x` and `y`. //! @return the relative difference between `x` and `y`.
template<typename T> template<typename T>
double Difference(const RegressionCoefficients<T>& x, const RegressionCoefficients<T>& y) const { double Difference(const RegressionCoefficients<T>& x, const RegressionCoefficients<T>& y) const {
const double pred_norm = (pred_norm_ < 0) ? std::min(arma::norm(data_->cx(), "inf"), arma::norm(data_->cx(), 1)) :
pred_norm_;
return std::sqrt(data_->n_obs()) * std::abs(x.intercept - y.intercept) +
pred_norm * arma::norm(x.beta - y.beta, 2);
}
//! Get the difference between two sets of regression coefficients.
//!
//! For the LS loss, the relative difference is an approximation to the 2-norm of the matrix-vector product
//! ||X . (beta1 - beta2) + ( mu1 - mu2 )||_2 < |mu1 - mu2| sqrt(n) + ||X|| ||beta1 - beta2||_2
//!
//! @param x a set of regression coefficients.
//! @param y the other set of regression coefficients.
//! @return the relative difference between `x` and `y`.
template<typename T>
double Difference(const RegressionCoefficients<T>& x, const RegressionCoefficients<T>& y) {
if (pred_norm_ < 0) {
pred_norm_ = std::min(arma::norm(data_->cx(), "inf"), arma::norm(data_->cx(), 1));
}
return std::sqrt(data_->n_obs()) * std::abs(x.intercept - y.intercept) + return std::sqrt(data_->n_obs()) * std::abs(x.intercept - y.intercept) +
pred_norm_ * arma::norm(x.beta - y.beta, 2); pred_norm_ * arma::norm(x.beta - y.beta, 2);
} }
......
This diff is collapsed.
...@@ -100,11 +100,13 @@ class DataProxy { ...@@ -100,11 +100,13 @@ class DataProxy {
//! @param the new loss function. //! @param the new loss function.
//! @return information on what data changed. //! @return information on what data changed.
DataChanges Update(const LossFunction& loss) noexcept { DataChanges Update(const LossFunction& loss) noexcept {
if (data_ != &loss.data()) { // if (data_ != &loss.data() ||
data_ = &loss.data(); // (data_ && (data_->n_obs() != loss.data().n_obs() || data_->n_pred() != loss.data().n_pred()))) {
return DataChanges {true, 0}; // data_ = &loss.data();
} // return DataChanges {true, 0};
return DataChanges {false, 0}; // }
data_ = &loss.data();
return DataChanges {true, 0};
} }
//! Access the data set. //! Access the data set.
...@@ -165,23 +167,25 @@ class DataProxy<LossFunction, std::true_type> { ...@@ -165,23 +167,25 @@ class DataProxy<LossFunction, std::true_type> {
//! @return information on what data changed. //! @return information on what data changed.
DataChanges Update(const LossFunction& loss) { DataChanges Update(const LossFunction& loss) {
// Check if it's actually new data and/or new weights // Check if it's actually new data and/or new weights
DataChanges changes {data_ != &loss.data(), 0}; // DataChanges changes {data_ != &loss.data() || (data_ && (data_->n_obs() != loss.data().n_obs() ||
if (sqrt_weights_ != &loss.sqrt_weights()) { // data_->n_pred() != loss.data().n_pred())), 0};
if (sqrt_weights_ && (sqrt_weights_->n_elem == loss.sqrt_weights().n_elem)) { // if (sqrt_weights_ != &loss.sqrt_weights()) {
// Check by how much the weights changed. If the change is small enough, the caller does not need to know. // if (sqrt_weights_ && (sqrt_weights_->n_elem == loss.sqrt_weights().n_elem)) {
const double diff_norm = arma::norm(loss.sqrt_weights() - *sqrt_weights_); // // Check by how much the weights changed. If the change is small enough, the caller does not need to know.
// const double diff_norm = arma::norm(loss.sqrt_weights() - *sqrt_weights_);
if (diff_norm * diff_norm < sqrt_weights_->n_elem * _optim_dal_internal::kMaximumRelativeWeightChange) {
changes.weights_changed = 1; // if (diff_norm * diff_norm < sqrt_weights_->n_elem * _optim_dal_internal::kMaximumRelativeWeightChange) {
} else { // changes.weights_changed = 1;
changes.weights_changed = 2; // } else {
} // changes.weights_changed = 2;
sqrt_weights_ = &loss.sqrt_weights(); // }
} else { // sqrt_weights_ = &loss.sqrt_weights();
// The dimensions of the weights vector changed. // } else {
changes.weights_changed = 2; // // The dimensions of the weights vector changed.
} // changes.weights_changed = 2;
} // }
// }
DataChanges changes {true, 2};
if (changes.data_changed || changes.weights_changed) { if (changes.data_changed || changes.weights_changed) {
sqrt_weights_ = &loss.sqrt_weights(); sqrt_weights_ = &loss.sqrt_weights();
...@@ -398,9 +402,11 @@ class Hessian { ...@@ -398,9 +402,11 @@ class Hessian {
// PCG did not converge. Recompute the preconditiioner. // PCG did not converge. Recompute the preconditiioner.
metric->AddDetail("step_dir_invert_hessian", 1); metric->AddDetail("step_dir_invert_hessian", 1);
metric->AddDetail("step_dir_pcg_iter", (pcg_iters < 0) ? static_cast<int>(gradient.n_elem) : pcg_iters); metric->AddDetail("step_dir_pcg_iter", (pcg_iters < 0) ? static_cast<int>(gradient.n_elem) : pcg_iters);
const bool success = arma::inv_sympd(preconditioner_, hessian); if (!arma::inv_sympd(preconditioner_, hessian)) {
return kPhiStepDirInversionFailed;
}
*step_dir = preconditioner_ * gradient; *step_dir = preconditioner_ * gradient;
return success ? kPhiStepDirFullInversion : kPhiStepDirInversionFailed; return kPhiStepDirFullInversion;
} else { } else {
metric->AddDetail("step_dir_pcg_iter", static_cast<int>(pcg_iters)); metric->AddDetail("step_dir_pcg_iter", static_cast<int>(pcg_iters));
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment