Commit 49819291 authored by davidkep's avatar davidkep

Retain "good" solutions along the ENPY iterations.

parent ff5a2043
...@@ -13,7 +13,6 @@ export(mlocscale) ...@@ -13,7 +13,6 @@ export(mlocscale)
export(mscale) export(mscale)
export(pense) export(pense)
export(rho_function) export(rho_function)
export(s_algo_options)
export(tau_size) export(tau_size)
importFrom(Rcpp,evalCpp) importFrom(Rcpp,evalCpp)
importFrom(stats,mad) importFrom(stats,mad)
......
...@@ -4,23 +4,27 @@ ...@@ -4,23 +4,27 @@
#' @param max_it maximum number of PY iterations. #' @param max_it maximum number of PY iterations.
#' @param eps numerical tolerance to check for convergence. #' @param eps numerical tolerance to check for convergence.
#' @param keep_psc_proportion how many observations should be kept based on the Principal Sensitivy Components. #' @param keep_psc_proportion how many observations should be kept based on the Principal Sensitivy Components.
#' @param keep_residuals_measure how to determine how many observations to keep, based on their residuals. #' @param keep_residuals_measure how to determine what observations to keep, based on their residuals.
#' If `proportion`, a fixed number of observations is kept, while if `threshold`, #' If `proportion`, a fixed number of observations is kept, while if `threshold`,
#' only observations with residuals below a threshold are kept. #' only observations with residuals below the threshold are kept.
#' @param keep_residuals_proportion how many observations should be kept based on their residuals. #' @param keep_residuals_proportion how many observations should be kept based on their residuals.
#' @param keep_residuals_threshold only observations with (standardized) residuals less than this threshold are kept. #' @param keep_residuals_threshold only observations with (standardized) residuals less than this threshold are kept.
#' @param retain_best_factor in addition to the candidates from the last iteration, also keep candidates
#' that are within this factor of the best candidate.
#' #'
#' @return options for the ENPY algorithm. #' @return options for the ENPY algorithm.
#' @export #' @export
enpy_options <- function (max_it = 10, eps = 1e-9, keep_psc_proportion = 0.5, enpy_options <- function (max_it = 10, eps = 1e-6, keep_psc_proportion = 0.5,
keep_residuals_measure = c('proportion', 'threshold'), keep_residuals_measure = c('threshold', 'proportion'),
keep_residuals_proportion = 0.5, keep_residuals_threshold = 2) { keep_residuals_proportion = 0.5, keep_residuals_threshold = 2,
retain_best_factor = 1.1) {
list(max_it = as.integer(max_it[[1L]]), list(max_it = as.integer(max_it[[1L]]),
eps = as.numeric(eps[[1L]]), eps = as.numeric(eps[[1L]]),
keep_psc_proportion = as.numeric(keep_psc_proportion[[1L]]), keep_psc_proportion = as.numeric(keep_psc_proportion[[1L]]),
use_residual_threshold = match.arg(keep_residuals_measure) == 'threshold', use_residual_threshold = match.arg(keep_residuals_measure) == 'threshold',
keep_residuals_proportion = as.numeric(keep_residuals_proportion[[1L]]), keep_residuals_proportion = as.numeric(keep_residuals_proportion[[1L]]),
keep_residuals_threshold = as.numeric(keep_residuals_threshold[[1L]])) keep_residuals_threshold = as.numeric(keep_residuals_threshold[[1L]]),
retain_best_factor = as.numeric(retain_best_factor[[1L]]))
} }
#' Options for the M-estimation Algorithm #' Options for the M-estimation Algorithm
...@@ -30,7 +34,7 @@ enpy_options <- function (max_it = 10, eps = 1e-9, keep_psc_proportion = 0.5, ...@@ -30,7 +34,7 @@ enpy_options <- function (max_it = 10, eps = 1e-9, keep_psc_proportion = 0.5,
#' #'
#' @return options for the M-estimation algorithm. #' @return options for the M-estimation algorithm.
#' @export #' @export
mest_options <- function (max_it = 200, eps = 1e-8) { mest_options <- function (max_it = 200, eps = 1e-6) {
list(max_it = as.integer(max_it[[1L]]), list(max_it = as.integer(max_it[[1L]]),
eps = as.numeric(eps[[1L]])) eps = as.numeric(eps[[1L]]))
} }
...@@ -43,7 +47,7 @@ mest_options <- function (max_it = 200, eps = 1e-8) { ...@@ -43,7 +47,7 @@ mest_options <- function (max_it = 200, eps = 1e-8) {
#' @param eps numerical tolerance to check for convergence. #' @param eps numerical tolerance to check for convergence.
#' #'
#' @return options for the S-Estimate algorithm. #' @return options for the S-Estimate algorithm.
s_algo_options <- function (explore_it = 10, max_it = 500, eps = 1e-8, s_algo_options <- function (explore_it = 10, max_it = 500, eps = 1e-6,
tightening = c('none', 'adaptive', 'exponential'), tightening = c('none', 'adaptive', 'exponential'),
tightening_steps = 10) { tightening_steps = 10) {
list(max_it = as.integer(max_it[[1L]]), list(max_it = as.integer(max_it[[1L]]),
...@@ -80,7 +84,7 @@ s_algo_options <- function (explore_it = 10, max_it = 500, eps = 1e-8, ...@@ -80,7 +84,7 @@ s_algo_options <- function (explore_it = 10, max_it = 500, eps = 1e-8,
#' @return options for the ADMM EN algorithm. #' @return options for the ADMM EN algorithm.
#' @family EN algorithms #' @family EN algorithms
#' @export #' @export
en_admm_options <- function (max_it = 1000, eps = 1e-6, tau, sparse = FALSE, en_admm_options <- function (max_it = 1000, eps = 1e-9, tau, sparse = FALSE,
admm_type = c('auto', 'linearized', admm_type = c('auto', 'linearized',
'var-stepsize'), 'var-stepsize'),
tau_lower_mult = 0.01, tau_adjustment_lower = 0.98, tau_lower_mult = 0.01, tau_adjustment_lower = 0.98,
...@@ -112,7 +116,7 @@ en_admm_options <- function (max_it = 1000, eps = 1e-6, tau, sparse = FALSE, ...@@ -112,7 +116,7 @@ en_admm_options <- function (max_it = 1000, eps = 1e-6, tau, sparse = FALSE,
#' @return options for the DAL EN algorithm. #' @return options for the DAL EN algorithm.
#' @family EN algorithms #' @family EN algorithms
#' @export #' @export
en_dal_options <- function (max_it = 100, max_inner_it = 100, eps = 1e-9, eta_multiplier = 2, en_dal_options <- function (max_it = 100, max_inner_it = 100, eps = 1e-6, eta_multiplier = 2,
eta_start_conservative = 0.01, eta_start_aggressive = 1, eta_start_conservative = 0.01, eta_start_aggressive = 1,
lambda_relchange_aggressive = 0.25) { lambda_relchange_aggressive = 0.25) {
list(algorithm = 'dal', list(algorithm = 'dal',
......
#' ENPY Initial Estimates #' ENPY Initial Estimates
#' #'
#' @export #' @export
enpy_initial_estimates <- function (x, y, alpha, lambdas, bdp = 0.25, cc, enpy_initial_estimates <- function (x, y, alpha, lambdas, bdp = 0.25, cc, include_intercept = TRUE,
include_intercept = TRUE, enpy_opts = enpy_options(), mscale_maxit = 200, mscale_eps = 1e-9, en_options) {
enpy_opts = enpy_options(),
mscale_maxit = 200, mscale_eps = 1e-8,
en_options) {
if (missing(cc)) { if (missing(cc)) {
cc <- .bisquare_consistency_const(bdp) cc <- .bisquare_consistency_const(bdp)
} }
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
#' @export #' @export
pense <- function(x, y, alpha, lambdas, cold_lambdas, penalty_loadings, pense <- function(x, y, alpha, lambdas, cold_lambdas, penalty_loadings,
additional_initial_estimates, include_intercept = TRUE, additional_initial_estimates, include_intercept = TRUE,
max_it = 200, eps = 1e-5, explore_it = 10, max_it = 200, eps = 1e-6, explore_it = 10,
tightening = c('none', 'adaptive', 'exponential'), tightening = c('none', 'adaptive', 'exponential'),
tightening_steps = 10L, en_algorithm_opts, tightening_steps = 10L, en_algorithm_opts,
mest_opts = mest_options(), enpy_opts = enpy_options()) { mest_opts = mest_options(), enpy_opts = enpy_options()) {
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
\alias{en_admm_options} \alias{en_admm_options}
\title{Options for the ADMM Elastic Net Algorithm} \title{Options for the ADMM Elastic Net Algorithm}
\usage{ \usage{
en_admm_options(max_it = 1000, eps = 1e-06, tau, sparse = FALSE, en_admm_options(max_it = 1000, eps = 1e-09, tau, sparse = FALSE,
admm_type = c("auto", "linearized", "var-stepsize"), admm_type = c("auto", "linearized", "var-stepsize"),
tau_lower_mult = 0.01, tau_adjustment_lower = 0.98, tau_lower_mult = 0.01, tau_adjustment_lower = 0.98,
tau_adjustment_upper = 0.999) tau_adjustment_upper = 0.999)
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
\alias{en_dal_options} \alias{en_dal_options}
\title{Options for the DAL Elastic Net Algorithm} \title{Options for the DAL Elastic Net Algorithm}
\usage{ \usage{
en_dal_options(max_it = 100, max_inner_it = 100, eps = 1e-09, en_dal_options(max_it = 100, max_inner_it = 100, eps = 1e-06,
eta_multiplier = 2, eta_start_conservative = 0.01, eta_multiplier = 2, eta_start_conservative = 0.01,
eta_start_aggressive = 1, lambda_relchange_aggressive = 0.25) eta_start_aggressive = 1, lambda_relchange_aggressive = 0.25)
} }
......
...@@ -5,8 +5,8 @@ ...@@ -5,8 +5,8 @@
\title{ENPY Initial Estimates} \title{ENPY Initial Estimates}
\usage{ \usage{
enpy_initial_estimates(x, y, alpha, lambdas, bdp = 0.25, cc, enpy_initial_estimates(x, y, alpha, lambdas, bdp = 0.25, cc,
include_intercept = TRUE, enpy_opts = enpy_options(), mscale_maxit, include_intercept = TRUE, enpy_opts = enpy_options(),
mscale_eps, en_options) mscale_maxit = 200, mscale_eps = 1e-09, en_options)
} }
\description{ \description{
ENPY Initial Estimates ENPY Initial Estimates
......
...@@ -4,9 +4,10 @@ ...@@ -4,9 +4,10 @@
\alias{enpy_options} \alias{enpy_options}
\title{Options for the ENPY Algorithm} \title{Options for the ENPY Algorithm}
\usage{ \usage{
enpy_options(max_it = 10, eps = 1e-09, keep_psc_proportion = 0.5, enpy_options(max_it = 10, eps = 1e-06, keep_psc_proportion = 0.5,
keep_residuals_measure = c("proportion", "threshold"), keep_residuals_measure = c("threshold", "proportion"),
keep_residuals_proportion = 0.5, keep_residuals_threshold = 2) keep_residuals_proportion = 0.5, keep_residuals_threshold = 2,
retain_best_factor = 1.1)
} }
\arguments{ \arguments{
\item{max_it}{maximum number of PY iterations.} \item{max_it}{maximum number of PY iterations.}
...@@ -15,13 +16,16 @@ enpy_options(max_it = 10, eps = 1e-09, keep_psc_proportion = 0.5, ...@@ -15,13 +16,16 @@ enpy_options(max_it = 10, eps = 1e-09, keep_psc_proportion = 0.5,
\item{keep_psc_proportion}{how many observations should be kept based on the Principal Sensitivy Components.} \item{keep_psc_proportion}{how many observations should be kept based on the Principal Sensitivy Components.}
\item{keep_residuals_measure}{how to determine how many observations to keep, based on their residuals. \item{keep_residuals_measure}{how to determine what observations to keep, based on their residuals.
If \code{proportion}, a fixed number of observations is kept, while if \code{threshold}, If \code{proportion}, a fixed number of observations is kept, while if \code{threshold},
only observations with residuals below a threshold are kept.} only observations with residuals below the threshold are kept.}
\item{keep_residuals_proportion}{how many observations should be kept based on their residuals.} \item{keep_residuals_proportion}{how many observations should be kept based on their residuals.}
\item{keep_residuals_threshold}{only observations with (standardized) residuals less than this threshold are kept.} \item{keep_residuals_threshold}{only observations with (standardized) residuals less than this threshold are kept.}
\item{retain_best_factor}{in addition to the candidates from the last iteration, also keep candidates
that are within this factor of the best candidate.}
} }
\value{ \value{
options for the ENPY algorithm. options for the ENPY algorithm.
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
\alias{mest_options} \alias{mest_options}
\title{Options for the M-estimation Algorithm} \title{Options for the M-estimation Algorithm}
\usage{ \usage{
mest_options(max_it = 200, eps = 1e-08) mest_options(max_it = 200, eps = 1e-06)
} }
\arguments{ \arguments{
\item{max_it}{maximum number of iterations.} \item{max_it}{maximum number of iterations.}
......
...@@ -6,7 +6,8 @@ ...@@ -6,7 +6,8 @@
\usage{ \usage{
pense(x, y, alpha, lambdas, cold_lambdas, penalty_loadings, pense(x, y, alpha, lambdas, cold_lambdas, penalty_loadings,
additional_initial_estimates, include_intercept = TRUE, max_it = 200, additional_initial_estimates, include_intercept = TRUE, max_it = 200,
eps = 1e-05, explore_it = 10, en_algorithm_opts, eps = 1e-06, explore_it = 10, tightening = c("none", "adaptive",
"exponential"), tightening_steps = 10L, en_algorithm_opts,
mest_opts = mest_options(), enpy_opts = enpy_options()) mest_opts = mest_options(), enpy_opts = enpy_options())
} }
\arguments{ \arguments{
......
...@@ -4,7 +4,9 @@ ...@@ -4,7 +4,9 @@
\alias{s_algo_options} \alias{s_algo_options}
\title{Options for the S-Estimate Algorithm} \title{Options for the S-Estimate Algorithm}
\usage{ \usage{
s_algo_options(explore_it = 10, max_it = 500, eps = 1e-08) s_algo_options(explore_it = 10, max_it = 500, eps = 1e-06,
tightening = c("none", "adaptive", "exponential"),
tightening_steps = 10)
} }
\arguments{ \arguments{
\item{explore_it}{number of iterations to explore potential candidate \item{explore_it}{number of iterations to explore potential candidate
......
CXX_STD = CXX11 CXX_STD = CXX11
PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) # -flto=thin PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) # -flto=thin
PKG_CPPFLAGS= -D__STDC_LIMIT_MACROS -DHAVE_RCPP -DNSOPTIM_DETAILED_METRICS -DTESTTHAT_DISABLED # PKG_CPPFLAGS= -D__STDC_LIMIT_MACROS -DHAVE_RCPP -DNSOPTIM_DETAILED_METRICS -DTESTTHAT_DISABLED
PKG_CXXFLAGS= -fstrict-aliasing -Wstrict-aliasing PKG_CXXFLAGS= -fstrict-aliasing -Wstrict-aliasing
PKG_OPTFLAGS= -g -O0 PKG_OPTFLAGS= -g -Os
# -DNSOPTIM_METRICS_DISABLED -DNSOPTIM_DETAILED_METRICS # -DNSOPTIM_METRICS_DISABLED -DNSOPTIM_DETAILED_METRICS
# PKG_CPPFLAGS= -D__STDC_LIMIT_MACROS -DHAVE_RCPP -DTESTTHAT_DISABLED PKG_CPPFLAGS= -D__STDC_LIMIT_MACROS -DHAVE_RCPP -DTESTTHAT_DISABLED
...@@ -9,12 +9,12 @@ ...@@ -9,12 +9,12 @@
#include <algorithm> #include <algorithm>
#include <nsoptim.hpp> #include <nsoptim.hpp>
#include "constants.hpp"
#include "rcpp_utils.hpp" #include "rcpp_utils.hpp"
#include "enpy_initest.hpp" #include "enpy_initest.hpp"
namespace { namespace {
constexpr int kDefaultCfgMaxIt = 1; //!< Maximum number of iterations. constexpr int kDefaultCfgMaxIt = 1; //!< Maximum number of iterations.
constexpr double kDefaultCfgEps = 1e-9; //!< Numerical tolerance level to determine convergence.
constexpr double kKeepPscProportion = 0.5; //!< Proportion of observations to keep based on PSCs. constexpr double kKeepPscProportion = 0.5; //!< Proportion of observations to keep based on PSCs.
constexpr bool kUseResidualThreshold = false; //!< Use a fixed threshold instead of a proportion constexpr bool kUseResidualThreshold = false; //!< Use a fixed threshold instead of a proportion
//!< to screen observations based on their //!< to screen observations based on their
...@@ -23,6 +23,8 @@ constexpr double kKeepResidualsProportion = 0.5; //!< Proportion of observation ...@@ -23,6 +23,8 @@ constexpr double kKeepResidualsProportion = 0.5; //!< Proportion of observation
//!< on the residuals. //!< on the residuals.
constexpr double kKeepResidualsThreshold = 2; //!< Fixed threshold to keep observations based constexpr double kKeepResidualsThreshold = 2; //!< Fixed threshold to keep observations based
//!< on the residuals. //!< on the residuals.
constexpr double kRetainBestFactor = 1.1; //!< Retain not only the candidates from the last iteration,
//!< but also those that are within this factor of the best candidate.
inline arma::uword HashUpdate(const arma::uword hash, const arma::uword value) noexcept; inline arma::uword HashUpdate(const arma::uword hash, const arma::uword value) noexcept;
} // namespace } // namespace
...@@ -32,11 +34,12 @@ namespace enpy_initest_internal { ...@@ -32,11 +34,12 @@ namespace enpy_initest_internal {
PyConfiguration ParseConfiguration(const Rcpp::List& config) noexcept { PyConfiguration ParseConfiguration(const Rcpp::List& config) noexcept {
return PyConfiguration{ return PyConfiguration{
GetFallback(config, "max_it", kDefaultCfgMaxIt), GetFallback(config, "max_it", kDefaultCfgMaxIt),
GetFallback(config, "eps", kDefaultCfgEps), GetFallback(config, "eps", kDefaultConvergenceTolerance),
GetFallback(config, "keep_psc_proportion", kKeepPscProportion), GetFallback(config, "keep_psc_proportion", kKeepPscProportion),
GetFallback(config, "use_residual_threshold", kUseResidualThreshold), GetFallback(config, "use_residual_threshold", kUseResidualThreshold),
GetFallback(config, "keep_residuals_proportion", kKeepResidualsProportion), GetFallback(config, "keep_residuals_proportion", kKeepResidualsProportion),
GetFallback(config, "keep_residuals_threshold", kKeepResidualsThreshold) GetFallback(config, "keep_residuals_threshold", kKeepResidualsThreshold),
GetFallback(config, "retain_best_factor", kRetainBestFactor)
}; };
} }
......
This diff is collapsed.
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include "r_robust_utils.hpp" #include "r_robust_utils.hpp"
#include "constants.hpp"
#include "rcpp_integration.hpp" #include "rcpp_integration.hpp"
#include "r_interface_utils.hpp" #include "r_interface_utils.hpp"
#include "alias.hpp" #include "alias.hpp"
...@@ -38,7 +39,6 @@ MLocationScaleEstimate GenericMLocationScale(const arma::vec& x, const Mscale<T> ...@@ -38,7 +39,6 @@ MLocationScaleEstimate GenericMLocationScale(const arma::vec& x, const Mscale<T>
} }
} }
constexpr double kDefaultMLocationEps = 1e-8;
constexpr int kDefaultMLocationMaxIt = 100; constexpr int kDefaultMLocationMaxIt = 100;
} // namespace } // namespace
...@@ -85,7 +85,7 @@ SEXP MLocation(SEXP r_x, SEXP r_scale, SEXP r_opts) noexcept { ...@@ -85,7 +85,7 @@ SEXP MLocation(SEXP r_x, SEXP r_scale, SEXP r_opts) noexcept {
auto opts = as<Rcpp::List>(r_opts); auto opts = as<Rcpp::List>(r_opts);
double const * const scale = REAL(r_scale); double const * const scale = REAL(r_scale);
const int max_it = GetFallback(opts, "max_it", kDefaultMLocationMaxIt); const int max_it = GetFallback(opts, "max_it", kDefaultMLocationMaxIt);
const double convergence_tol = GetFallback(opts, "eps", kDefaultMLocationEps); const double convergence_tol = GetFallback(opts, "eps", kDefaultConvergenceTolerance);
switch (static_cast<RhoFunctionType>(GetFallback(opts, "rho", static_cast<int>(RhoFunctionType::kRhoBisquare)))) { switch (static_cast<RhoFunctionType>(GetFallback(opts, "rho", static_cast<int>(RhoFunctionType::kRhoBisquare)))) {
case RhoFunctionType::kRhoHuber: case RhoFunctionType::kRhoHuber:
......
...@@ -26,8 +26,6 @@ constexpr double kMadScaleConsistencyConstant = 1.4826; ...@@ -26,8 +26,6 @@ constexpr double kMadScaleConsistencyConstant = 1.4826;
constexpr double kDefaultMscaleDelta = 0.5; constexpr double kDefaultMscaleDelta = 0.5;
//! Default number of iterations for the M-scale algorithm. //! Default number of iterations for the M-scale algorithm.
constexpr int kDefaultMscaleMaxIt = 100; constexpr int kDefaultMscaleMaxIt = 100;
//! Default numeric tolerance for the M-scale algorithm.
constexpr double kDefaultMscaleEps = 1e-7;
template <typename T> template <typename T>
struct DefaultMscaleConstant { struct DefaultMscaleConstant {
...@@ -71,7 +69,7 @@ class Mscale { ...@@ -71,7 +69,7 @@ class Mscale {
: rho_(GetFallback(user_options, "cc", robust_scale_location::DefaultMscaleConstant<RhoFunction>::value)), : rho_(GetFallback(user_options, "cc", robust_scale_location::DefaultMscaleConstant<RhoFunction>::value)),
delta_(GetFallback(user_options, "delta", robust_scale_location::kDefaultMscaleDelta)), delta_(GetFallback(user_options, "delta", robust_scale_location::kDefaultMscaleDelta)),
max_it_(GetFallback(user_options, "max_it", robust_scale_location::kDefaultMscaleMaxIt)), max_it_(GetFallback(user_options, "max_it", robust_scale_location::kDefaultMscaleMaxIt)),
eps_(GetFallback(user_options, "eps", robust_scale_location::kDefaultMscaleEps)), eps_(GetFallback(user_options, "eps", kDefaultConvergenceTolerance)),
scale_(-1) {} scale_(-1) {}
//! Construct the M-scale function. //! Construct the M-scale function.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment