Commit fbb3a219 authored by davidkep's avatar davidkep

Improve the linearized ADMM optimizer for weighted LS

parent dd82d05d
......@@ -9,6 +9,8 @@
#ifndef NSOPTIM_OBJECTIVE_LS_LOSS_HPP_
#define NSOPTIM_OBJECTIVE_LS_LOSS_HPP_
#include <algorithm>
#include <nsoptim/armadillo.hpp>
#include <nsoptim/objective/convex.hpp>
#include <nsoptim/container/data.hpp>
......@@ -62,7 +64,7 @@ class WeightedLsLoss : public LossFunction<PredictorResponseData>,
const bool include_intercept = true) noexcept
: include_intercept_(include_intercept), data_(data), mean_weight_(arma::mean(*weights)),
sqrt_weights_(std::make_shared<const arma::vec>(arma::sqrt(*weights / mean_weight_))),
weighted_pred_norm_(ls_loss::TwoNormUpper(data->cx(), *sqrt_weights_)) {}
weighted_pred_norm_(-1) {}
WeightedLsLoss(std::shared_ptr<const PredictorResponseData> data, const arma::vec& weights,
const bool include_intercept = true) noexcept
......@@ -116,6 +118,25 @@ class WeightedLsLoss : public LossFunction<PredictorResponseData>,
//! @return the difference between `x` and `y`.
template<typename T>
double Difference(const RegressionCoefficients<T>& x, const RegressionCoefficients<T>& y) const {
const double weighted_pred_norm = (weighted_pred_norm_ < 0) ? ls_loss::TwoNormUpper(data_->cx(), *sqrt_weights_) :
weighted_pred_norm_;
return std::sqrt(sqrt_weights_->n_elem * mean_weight_) * std::abs(x.intercept - y.intercept) +
weighted_pred_norm * arma::norm(x.beta - y.beta, 2);
}
//! Get the difference between two sets of regression coefficients.
//!
//! For the weighted LS loss, the difference is an approximation to the 2-norm of the matrix-vector product
//! ||W . X . (beta1 - beta2) + w ( mu1 - mu2 )||_2 < |mu1 - mu2| sqrt(sum(w)) + ||W X|| ||beta1 - beta2||_2
//!
//! @param x a set of regression coefficients.
//! @param y the other set of regression coefficients.
//! @return the difference between `x` and `y`.
template<typename T>
double Difference(const RegressionCoefficients<T>& x, const RegressionCoefficients<T>& y) {
if (weighted_pred_norm_ < 0) {
weighted_pred_norm_ = ls_loss::TwoNormUpper(data_->cx(), *sqrt_weights_);
}
return std::sqrt(sqrt_weights_->n_elem * mean_weight_) * std::abs(x.intercept - y.intercept) +
weighted_pred_norm_ * arma::norm(x.beta - y.beta, 2);
}
......@@ -137,13 +158,13 @@ class WeightedLsLoss : public LossFunction<PredictorResponseData>,
template<typename T>
GradientType<T> Gradient(const RegressionCoefficients<T>& coefs) const {
if (include_intercept_) {
const arma::vec neg_weighted_residuals = mean_weight_ * (*sqrt_weights_) % (*sqrt_weights_) %
const arma::vec neg_weighted_residuals = mean_weight_ * arma::square(*sqrt_weights_) %
(data_->cx() * coefs.beta + coefs.intercept - data_->cy());
return GradientType<T>(arma::mean(neg_weighted_residuals),
arma::mean(data_->cx().each_col() % neg_weighted_residuals, 0));
}
const arma::vec neg_weighted_residuals = mean_weight_ * (*sqrt_weights_) % (*sqrt_weights_) %
const arma::vec neg_weighted_residuals = mean_weight_ * arma::square(*sqrt_weights_) %
(data_->cx() * coefs.beta - data_->cy());
return GradientType<T>(0, -arma::mean(data_->cx().each_col() % neg_weighted_residuals, 0));
}
......@@ -158,8 +179,8 @@ class WeightedLsLoss : public LossFunction<PredictorResponseData>,
//! Access the un-normalized weights used by this weighted LS loss function.
//!
//! @return a vector of weights.
const WeightsType weights() const noexcept {
return mean_weight_ * (*sqrt_weights_) % (*sqrt_weights_);
WeightsType weights() const noexcept {
return mean_weight_ * arma::square(*sqrt_weights_);
}
//! Access the normalized sqrt-weights used by this weighted LS loss function.
......@@ -213,8 +234,7 @@ class LsLoss : public LossFunction<PredictorResponseData>,
struct is_ls_loss_tag {};
explicit LsLoss(std::shared_ptr<const PredictorResponseData> data, const bool include_intercept = true)
: include_intercept_(include_intercept), data_(data),
pred_norm_((data_->n_pred() < data_->n_obs()) ? arma::norm(data->cx(), "inf") : arma::norm(data->cx(), 1)) {}
: include_intercept_(include_intercept), data_(data), pred_norm_(-1) {}
LsLoss(const LsLoss& other) = default;
LsLoss(LsLoss&& other) = default;
......@@ -268,6 +288,25 @@ class LsLoss : public LossFunction<PredictorResponseData>,
//! @return the relative difference between `x` and `y`.
template<typename T>
double Difference(const RegressionCoefficients<T>& x, const RegressionCoefficients<T>& y) const {
const double pred_norm = (pred_norm_ < 0) ? std::min(arma::norm(data_->cx(), "inf"), arma::norm(data_->cx(), 1)) :
pred_norm_;
return std::sqrt(data_->n_obs()) * std::abs(x.intercept - y.intercept) +
pred_norm * arma::norm(x.beta - y.beta, 2);
}
//! Get the difference between two sets of regression coefficients.
//!
//! For the LS loss, the relative difference is an approximation to the 2-norm of the matrix-vector product
//! ||X . (beta1 - beta2) + ( mu1 - mu2 )||_2 < |mu1 - mu2| sqrt(n) + ||X|| ||beta1 - beta2||_2
//!
//! @param x a set of regression coefficients.
//! @param y the other set of regression coefficients.
//! @return the relative difference between `x` and `y`.
template<typename T>
double Difference(const RegressionCoefficients<T>& x, const RegressionCoefficients<T>& y) {
if (pred_norm_ < 0) {
pred_norm_ = std::min(arma::norm(data_->cx(), "inf"), arma::norm(data_->cx(), 1));
}
return std::sqrt(data_->n_obs()) * std::abs(x.intercept - y.intercept) +
pred_norm_ * arma::norm(x.beta - y.beta, 2);
}
......
This diff is collapsed.
......@@ -100,12 +100,14 @@ class DataProxy {
//! @param the new loss function.
//! @return information on what data changed.
DataChanges Update(const LossFunction& loss) noexcept {
if (data_ != &loss.data()) {
// if (data_ != &loss.data() ||
// (data_ && (data_->n_obs() != loss.data().n_obs() || data_->n_pred() != loss.data().n_pred()))) {
// data_ = &loss.data();
// return DataChanges {true, 0};
// }
data_ = &loss.data();
return DataChanges {true, 0};
}
return DataChanges {false, 0};
}
//! Access the data set.
//!
......@@ -165,23 +167,25 @@ class DataProxy<LossFunction, std::true_type> {
//! @return information on what data changed.
DataChanges Update(const LossFunction& loss) {
// Check if it's actually new data and/or new weights
DataChanges changes {data_ != &loss.data(), 0};
if (sqrt_weights_ != &loss.sqrt_weights()) {
if (sqrt_weights_ && (sqrt_weights_->n_elem == loss.sqrt_weights().n_elem)) {
// Check by how much the weights changed. If the change is small enough, the caller does not need to know.
const double diff_norm = arma::norm(loss.sqrt_weights() - *sqrt_weights_);
if (diff_norm * diff_norm < sqrt_weights_->n_elem * _optim_dal_internal::kMaximumRelativeWeightChange) {
changes.weights_changed = 1;
} else {
changes.weights_changed = 2;
}
sqrt_weights_ = &loss.sqrt_weights();
} else {
// The dimensions of the weights vector changed.
changes.weights_changed = 2;
}
}
// DataChanges changes {data_ != &loss.data() || (data_ && (data_->n_obs() != loss.data().n_obs() ||
// data_->n_pred() != loss.data().n_pred())), 0};
// if (sqrt_weights_ != &loss.sqrt_weights()) {
// if (sqrt_weights_ && (sqrt_weights_->n_elem == loss.sqrt_weights().n_elem)) {
// // Check by how much the weights changed. If the change is small enough, the caller does not need to know.
// const double diff_norm = arma::norm(loss.sqrt_weights() - *sqrt_weights_);
// if (diff_norm * diff_norm < sqrt_weights_->n_elem * _optim_dal_internal::kMaximumRelativeWeightChange) {
// changes.weights_changed = 1;
// } else {
// changes.weights_changed = 2;
// }
// sqrt_weights_ = &loss.sqrt_weights();
// } else {
// // The dimensions of the weights vector changed.
// changes.weights_changed = 2;
// }
// }
DataChanges changes {true, 2};
if (changes.data_changed || changes.weights_changed) {
sqrt_weights_ = &loss.sqrt_weights();
......@@ -398,9 +402,11 @@ class Hessian {
// PCG did not converge. Recompute the preconditiioner.
metric->AddDetail("step_dir_invert_hessian", 1);
metric->AddDetail("step_dir_pcg_iter", (pcg_iters < 0) ? static_cast<int>(gradient.n_elem) : pcg_iters);
const bool success = arma::inv_sympd(preconditioner_, hessian);
if (!arma::inv_sympd(preconditioner_, hessian)) {
return kPhiStepDirInversionFailed;
}
*step_dir = preconditioner_ * gradient;
return success ? kPhiStepDirFullInversion : kPhiStepDirInversionFailed;
return kPhiStepDirFullInversion;
} else {
metric->AddDetail("step_dir_pcg_iter", static_cast<int>(pcg_iters));
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment