Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
P
pense
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
David Kepplinger
pense
Commits
49819291
Commit
49819291
authored
Aug 07, 2019
by
davidkep
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Retain "good" solutions along the ENPY iterations.
parent
ff5a2043
Changes
16
Show whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
140 additions
and
96 deletions
+140
-96
NAMESPACE
NAMESPACE
+0
-1
control_options.R
R/control_options.R
+14
-10
enpy.R
R/enpy.R
+2
-5
pense_regression.R
R/pense_regression.R
+1
-1
en_admm_options.Rd
man/en_admm_options.Rd
+1
-1
en_dal_options.Rd
man/en_dal_options.Rd
+1
-1
enpy_initial_estimates.Rd
man/enpy_initial_estimates.Rd
+2
-2
enpy_options.Rd
man/enpy_options.Rd
+9
-5
mest_options.Rd
man/mest_options.Rd
+1
-1
pense.Rd
man/pense.Rd
+2
-1
s_algo_options.Rd
man/s_algo_options.Rd
+3
-1
Makevars
src/Makevars
+3
-3
enpy_initest.cc
src/enpy_initest.cc
+6
-3
enpy_initest.hpp
src/enpy_initest.hpp
+92
-56
r_robust_utils.cc
src/r_robust_utils.cc
+2
-2
robust_scale_location.hpp
src/robust_scale_location.hpp
+1
-3
No files found.
NAMESPACE
View file @
49819291
...
...
@@ -13,7 +13,6 @@ export(mlocscale)
export(mscale)
export(pense)
export(rho_function)
export(s_algo_options)
export(tau_size)
importFrom(Rcpp,evalCpp)
importFrom(stats,mad)
...
...
R/control_options.R
View file @
49819291
...
...
@@ -4,23 +4,27 @@
#' @param max_it maximum number of PY iterations.
#' @param eps numerical tolerance to check for convergence.
#' @param keep_psc_proportion how many observations should be kept based on the Principal Sensitivy Components.
#' @param keep_residuals_measure how to determine
how many
observations to keep, based on their residuals.
#' @param keep_residuals_measure how to determine
what
observations to keep, based on their residuals.
#' If `proportion`, a fixed number of observations is kept, while if `threshold`,
#' only observations with residuals below
a
threshold are kept.
#' only observations with residuals below
the
threshold are kept.
#' @param keep_residuals_proportion how many observations should be kept based on their residuals.
#' @param keep_residuals_threshold only observations with (standardized) residuals less than this threshold are kept.
#' @param retain_best_factor in addition to the candidates from the last iteration, also keep candidates
#' that are within this factor of the best candidate.
#'
#' @return options for the ENPY algorithm.
#' @export
enpy_options
<-
function
(
max_it
=
10
,
eps
=
1e-9
,
keep_psc_proportion
=
0.5
,
keep_residuals_measure
=
c
(
'proportion'
,
'threshold'
),
keep_residuals_proportion
=
0.5
,
keep_residuals_threshold
=
2
)
{
enpy_options
<-
function
(
max_it
=
10
,
eps
=
1e-6
,
keep_psc_proportion
=
0.5
,
keep_residuals_measure
=
c
(
'threshold'
,
'proportion'
),
keep_residuals_proportion
=
0.5
,
keep_residuals_threshold
=
2
,
retain_best_factor
=
1.1
)
{
list
(
max_it
=
as.integer
(
max_it
[[
1L
]]),
eps
=
as.numeric
(
eps
[[
1L
]]),
keep_psc_proportion
=
as.numeric
(
keep_psc_proportion
[[
1L
]]),
use_residual_threshold
=
match.arg
(
keep_residuals_measure
)
==
'threshold'
,
keep_residuals_proportion
=
as.numeric
(
keep_residuals_proportion
[[
1L
]]),
keep_residuals_threshold
=
as.numeric
(
keep_residuals_threshold
[[
1L
]]))
keep_residuals_threshold
=
as.numeric
(
keep_residuals_threshold
[[
1L
]]),
retain_best_factor
=
as.numeric
(
retain_best_factor
[[
1L
]]))
}
#' Options for the M-estimation Algorithm
...
...
@@ -30,7 +34,7 @@ enpy_options <- function (max_it = 10, eps = 1e-9, keep_psc_proportion = 0.5,
#'
#' @return options for the M-estimation algorithm.
#' @export
mest_options
<-
function
(
max_it
=
200
,
eps
=
1e-
8
)
{
mest_options
<-
function
(
max_it
=
200
,
eps
=
1e-
6
)
{
list
(
max_it
=
as.integer
(
max_it
[[
1L
]]),
eps
=
as.numeric
(
eps
[[
1L
]]))
}
...
...
@@ -43,7 +47,7 @@ mest_options <- function (max_it = 200, eps = 1e-8) {
#' @param eps numerical tolerance to check for convergence.
#'
#' @return options for the S-Estimate algorithm.
s_algo_options
<-
function
(
explore_it
=
10
,
max_it
=
500
,
eps
=
1e-
8
,
s_algo_options
<-
function
(
explore_it
=
10
,
max_it
=
500
,
eps
=
1e-
6
,
tightening
=
c
(
'none'
,
'adaptive'
,
'exponential'
),
tightening_steps
=
10
)
{
list
(
max_it
=
as.integer
(
max_it
[[
1L
]]),
...
...
@@ -80,7 +84,7 @@ s_algo_options <- function (explore_it = 10, max_it = 500, eps = 1e-8,
#' @return options for the ADMM EN algorithm.
#' @family EN algorithms
#' @export
en_admm_options
<-
function
(
max_it
=
1000
,
eps
=
1e-
6
,
tau
,
sparse
=
FALSE
,
en_admm_options
<-
function
(
max_it
=
1000
,
eps
=
1e-
9
,
tau
,
sparse
=
FALSE
,
admm_type
=
c
(
'auto'
,
'linearized'
,
'var-stepsize'
),
tau_lower_mult
=
0.01
,
tau_adjustment_lower
=
0.98
,
...
...
@@ -112,7 +116,7 @@ en_admm_options <- function (max_it = 1000, eps = 1e-6, tau, sparse = FALSE,
#' @return options for the DAL EN algorithm.
#' @family EN algorithms
#' @export
en_dal_options
<-
function
(
max_it
=
100
,
max_inner_it
=
100
,
eps
=
1e-
9
,
eta_multiplier
=
2
,
en_dal_options
<-
function
(
max_it
=
100
,
max_inner_it
=
100
,
eps
=
1e-
6
,
eta_multiplier
=
2
,
eta_start_conservative
=
0.01
,
eta_start_aggressive
=
1
,
lambda_relchange_aggressive
=
0.25
)
{
list
(
algorithm
=
'dal'
,
...
...
R/enpy.R
View file @
49819291
#' ENPY Initial Estimates
#'
#' @export
enpy_initial_estimates
<-
function
(
x
,
y
,
alpha
,
lambdas
,
bdp
=
0.25
,
cc
,
include_intercept
=
TRUE
,
enpy_opts
=
enpy_options
(),
mscale_maxit
=
200
,
mscale_eps
=
1e-8
,
en_options
)
{
enpy_initial_estimates
<-
function
(
x
,
y
,
alpha
,
lambdas
,
bdp
=
0.25
,
cc
,
include_intercept
=
TRUE
,
enpy_opts
=
enpy_options
(),
mscale_maxit
=
200
,
mscale_eps
=
1e-9
,
en_options
)
{
if
(
missing
(
cc
))
{
cc
<-
.bisquare_consistency_const
(
bdp
)
}
...
...
R/pense_regression.R
View file @
49819291
...
...
@@ -29,7 +29,7 @@
#' @export
pense
<-
function
(
x
,
y
,
alpha
,
lambdas
,
cold_lambdas
,
penalty_loadings
,
additional_initial_estimates
,
include_intercept
=
TRUE
,
max_it
=
200
,
eps
=
1e-
5
,
explore_it
=
10
,
max_it
=
200
,
eps
=
1e-
6
,
explore_it
=
10
,
tightening
=
c
(
'none'
,
'adaptive'
,
'exponential'
),
tightening_steps
=
10L
,
en_algorithm_opts
,
mest_opts
=
mest_options
(),
enpy_opts
=
enpy_options
())
{
...
...
man/en_admm_options.Rd
View file @
49819291
...
...
@@ -4,7 +4,7 @@
\alias{en_admm_options}
\title{Options for the ADMM Elastic Net Algorithm}
\usage{
en_admm_options(max_it = 1000, eps = 1e-0
6
, tau, sparse = FALSE,
en_admm_options(max_it = 1000, eps = 1e-0
9
, tau, sparse = FALSE,
admm_type = c("auto", "linearized", "var-stepsize"),
tau_lower_mult = 0.01, tau_adjustment_lower = 0.98,
tau_adjustment_upper = 0.999)
...
...
man/en_dal_options.Rd
View file @
49819291
...
...
@@ -4,7 +4,7 @@
\alias{en_dal_options}
\title{Options for the DAL Elastic Net Algorithm}
\usage{
en_dal_options(max_it = 100, max_inner_it = 100, eps = 1e-0
9
,
en_dal_options(max_it = 100, max_inner_it = 100, eps = 1e-0
6
,
eta_multiplier = 2, eta_start_conservative = 0.01,
eta_start_aggressive = 1, lambda_relchange_aggressive = 0.25)
}
...
...
man/enpy_initial_estimates.Rd
View file @
49819291
...
...
@@ -5,8 +5,8 @@
\title{ENPY Initial Estimates}
\usage{
enpy_initial_estimates(x, y, alpha, lambdas, bdp = 0.25, cc,
include_intercept = TRUE, enpy_opts = enpy_options(),
mscale_maxit,
mscale_
eps
, en_options)
include_intercept = TRUE, enpy_opts = enpy_options(),
mscale_
maxit = 200, mscale_eps = 1e-09
, en_options)
}
\description{
ENPY Initial Estimates
...
...
man/enpy_options.Rd
View file @
49819291
...
...
@@ -4,9 +4,10 @@
\alias{enpy_options}
\title{Options for the ENPY Algorithm}
\usage{
enpy_options(max_it = 10, eps = 1e-09, keep_psc_proportion = 0.5,
keep_residuals_measure = c("proportion", "threshold"),
keep_residuals_proportion = 0.5, keep_residuals_threshold = 2)
enpy_options(max_it = 10, eps = 1e-06, keep_psc_proportion = 0.5,
keep_residuals_measure = c("threshold", "proportion"),
keep_residuals_proportion = 0.5, keep_residuals_threshold = 2,
retain_best_factor = 1.1)
}
\arguments{
\item{max_it}{maximum number of PY iterations.}
...
...
@@ -15,13 +16,16 @@ enpy_options(max_it = 10, eps = 1e-09, keep_psc_proportion = 0.5,
\item{keep_psc_proportion}{how many observations should be kept based on the Principal Sensitivy Components.}
\item{keep_residuals_measure}{how to determine
how many
observations to keep, based on their residuals.
\item{keep_residuals_measure}{how to determine
what
observations to keep, based on their residuals.
If \code{proportion}, a fixed number of observations is kept, while if \code{threshold},
only observations with residuals below
a
threshold are kept.}
only observations with residuals below
the
threshold are kept.}
\item{keep_residuals_proportion}{how many observations should be kept based on their residuals.}
\item{keep_residuals_threshold}{only observations with (standardized) residuals less than this threshold are kept.}
\item{retain_best_factor}{in addition to the candidates from the last iteration, also keep candidates
that are within this factor of the best candidate.}
}
\value{
options for the ENPY algorithm.
...
...
man/mest_options.Rd
View file @
49819291
...
...
@@ -4,7 +4,7 @@
\alias{mest_options}
\title{Options for the M-estimation Algorithm}
\usage{
mest_options(max_it = 200, eps = 1e-0
8
)
mest_options(max_it = 200, eps = 1e-0
6
)
}
\arguments{
\item{max_it}{maximum number of iterations.}
...
...
man/pense.Rd
View file @
49819291
...
...
@@ -6,7 +6,8 @@
\usage{
pense(x, y, alpha, lambdas, cold_lambdas, penalty_loadings,
additional_initial_estimates, include_intercept = TRUE, max_it = 200,
eps = 1e-05, explore_it = 10, en_algorithm_opts,
eps = 1e-06, explore_it = 10, tightening = c("none", "adaptive",
"exponential"), tightening_steps = 10L, en_algorithm_opts,
mest_opts = mest_options(), enpy_opts = enpy_options())
}
\arguments{
...
...
man/s_algo_options.Rd
View file @
49819291
...
...
@@ -4,7 +4,9 @@
\alias{s_algo_options}
\title{Options for the S-Estimate Algorithm}
\usage{
s_algo_options(explore_it = 10, max_it = 500, eps = 1e-08)
s_algo_options(explore_it = 10, max_it = 500, eps = 1e-06,
tightening = c("none", "adaptive", "exponential"),
tightening_steps = 10)
}
\arguments{
\item{explore_it}{number of iterations to explore potential candidate
...
...
src/Makevars
View file @
49819291
CXX_STD = CXX11
PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) # -flto=thin
PKG_CPPFLAGS= -D__STDC_LIMIT_MACROS -DHAVE_RCPP -DNSOPTIM_DETAILED_METRICS -DTESTTHAT_DISABLED
#
PKG_CPPFLAGS= -D__STDC_LIMIT_MACROS -DHAVE_RCPP -DNSOPTIM_DETAILED_METRICS -DTESTTHAT_DISABLED
PKG_CXXFLAGS= -fstrict-aliasing -Wstrict-aliasing
PKG_OPTFLAGS= -g -O
0
PKG_OPTFLAGS= -g -O
s
# -DNSOPTIM_METRICS_DISABLED -DNSOPTIM_DETAILED_METRICS
#
PKG_CPPFLAGS= -D__STDC_LIMIT_MACROS -DHAVE_RCPP -DTESTTHAT_DISABLED
PKG_CPPFLAGS= -D__STDC_LIMIT_MACROS -DHAVE_RCPP -DTESTTHAT_DISABLED
src/enpy_initest.cc
View file @
49819291
...
...
@@ -9,12 +9,12 @@
#include <algorithm>
#include <nsoptim.hpp>
#include "constants.hpp"
#include "rcpp_utils.hpp"
#include "enpy_initest.hpp"
namespace
{
constexpr
int
kDefaultCfgMaxIt
=
1
;
//!< Maximum number of iterations.
constexpr
double
kDefaultCfgEps
=
1e-9
;
//!< Numerical tolerance level to determine convergence.
constexpr
double
kKeepPscProportion
=
0.5
;
//!< Proportion of observations to keep based on PSCs.
constexpr
bool
kUseResidualThreshold
=
false
;
//!< Use a fixed threshold instead of a proportion
//!< to screen observations based on their
...
...
@@ -23,6 +23,8 @@ constexpr double kKeepResidualsProportion = 0.5; //!< Proportion of observation
//!< on the residuals.
constexpr
double
kKeepResidualsThreshold
=
2
;
//!< Fixed threshold to keep observations based
//!< on the residuals.
constexpr
double
kRetainBestFactor
=
1.1
;
//!< Retain not only the candidates from the last iteration,
//!< but also those that are within this factor of the best candidate.
inline
arma
::
uword
HashUpdate
(
const
arma
::
uword
hash
,
const
arma
::
uword
value
)
noexcept
;
}
// namespace
...
...
@@ -32,11 +34,12 @@ namespace enpy_initest_internal {
PyConfiguration
ParseConfiguration
(
const
Rcpp
::
List
&
config
)
noexcept
{
return
PyConfiguration
{
GetFallback
(
config
,
"max_it"
,
kDefaultCfgMaxIt
),
GetFallback
(
config
,
"eps"
,
kDefaultC
fgEps
),
GetFallback
(
config
,
"eps"
,
kDefaultC
onvergenceTolerance
),
GetFallback
(
config
,
"keep_psc_proportion"
,
kKeepPscProportion
),
GetFallback
(
config
,
"use_residual_threshold"
,
kUseResidualThreshold
),
GetFallback
(
config
,
"keep_residuals_proportion"
,
kKeepResidualsProportion
),
GetFallback
(
config
,
"keep_residuals_threshold"
,
kKeepResidualsThreshold
)
GetFallback
(
config
,
"keep_residuals_threshold"
,
kKeepResidualsThreshold
),
GetFallback
(
config
,
"retain_best_factor"
,
kRetainBestFactor
)
};
}
...
...
src/enpy_initest.hpp
View file @
49819291
...
...
@@ -35,6 +35,8 @@ struct PyConfiguration {
//!< their residuals.
double
keep_residuals_proportion
;
//!< Proportion of observations to keep based on the residuals.
double
keep_residuals_threshold
;
//!< Fixed threshold to keep observations based on the residuals.
double
retain_best_factor
;
//!< Retain not only the candidates from the last iteration, but also those that are
//!< within this factor of the best candidate.
};
//! Parse an Rcpp::List into the PyConfiguration structure.
...
...
@@ -70,10 +72,12 @@ template<typename T>
class
IndexCompAbsoluteAscending
{
public:
//! Create a comparator using the given _values_ for comparision.
explicit
IndexCompAbsoluteAscending
(
const
T
&
values
)
noexcept
:
values_
(
arma
::
abs
(
values
))
{}
bool
operator
()(
const
arma
::
uword
a
,
const
arma
::
uword
b
)
{
return
std
::
abs
(
values_
[
a
])
<
std
::
abs
(
values_
[
b
]);
}
explicit
IndexCompAbsoluteAscending
(
const
T
&
values
)
noexcept
:
values_
(
values
)
{}
bool
operator
()(
const
arma
::
uword
a
,
const
arma
::
uword
b
)
const
{
return
std
::
abs
(
values_
[
a
])
<
std
::
abs
(
values_
[
b
]);
}
private:
const
arma
::
vec
values_
;
const
T
&
values_
;
};
//! Comparator class to sort an _index_ vector based on ascending values of the _values_ vector.
...
...
@@ -81,7 +85,7 @@ template<typename T>
class
IndexCompAscending
{
public:
explicit
IndexCompAscending
(
const
T
&
values
)
noexcept
:
values_
(
values
)
{}
bool
operator
()(
const
arma
::
uword
a
,
const
arma
::
uword
b
)
{
return
values_
[
a
]
<
values_
[
b
];
}
bool
operator
()(
const
arma
::
uword
a
,
const
arma
::
uword
b
)
const
{
return
values_
[
a
]
<
values_
[
b
];
}
private:
const
T
&
values_
;
};
...
...
@@ -91,7 +95,7 @@ template<typename T>
class
IndexCompDescending
{
public:
explicit
IndexCompDescending
(
const
T
&
values
)
noexcept
:
values_
(
values
)
{}
bool
operator
()(
const
arma
::
uword
a
,
const
arma
::
uword
b
)
{
return
values_
[
a
]
>
values_
[
b
];
}
bool
operator
()(
const
arma
::
uword
a
,
const
arma
::
uword
b
)
const
{
return
values_
[
a
]
>
values_
[
b
];
}
private:
const
T
&
values_
;
};
...
...
@@ -104,18 +108,17 @@ enum class SubsetEstimateResult {
//! Compute estimator & the objective function value on a subset of the given data.
//!
//! @param
full_data_loss the loss operating on the full
data set.
//! @param
filtered_data_loss the loss operating on the residuals-filtered data
.
//! @param
loss the LS loss operating on the **full**
data set.
//! @param
subset an index vector (with respect to the full data) of observations to consider
.
//! @param optim the optimizer to compute the estimate.
//! @param subset a subset of the index vector with indices to consider.
//! @param candidates list of candidates which will be updated.
//! @param insert_it iterator pointing to the position where to insert the new candidate. The iterator
//! will be updated to point to the newly inserted element.
//! @param subset_hashes hashes of all previously examined subsets. The hash of th
is
subset is added to the list.
//! @param subset_hashes hashes of all previously examined subsets. The hash of th
e new
subset is added to the list.
//! @return status of the minimum computed on a subset of the data.
template
<
typename
Optimizer
>
inline
SubsetEstimateResult
EstimateOnSubset
(
const
nsoptim
::
LsLoss
&
filtered_data_
loss
,
const
arma
::
subview_col
<
arma
::
uword
>
&
subset
,
Optimizer
*
optim
,
template
<
typename
Optimizer
,
typename
T
>
inline
SubsetEstimateResult
EstimateOnSubset
(
const
nsoptim
::
LsLoss
&
loss
,
const
T
&
subset
,
Optimizer
*
optim
,
alias
::
Optima
<
Optimizer
>*
candidates
,
typename
alias
::
Optima
<
Optimizer
>::
iterator
*
insert_it
,
std
::
unordered_set
<
arma
::
uword
>*
subset_hashes
);
...
...
@@ -214,10 +217,12 @@ PyResult<Optimizer> PYIterations(SLoss loss, const typename Optimizer::PenaltyFu
}
uvec
all_indices
=
regspace
<
uvec
>
(
0
,
data
.
n_obs
()
-
1
);
uvec
residuals_keep_ind
=
all_indices
;
HashSet
residual_filtered_data_hashes
{
HashSequence
(
all_indices
.
n_elem
-
1
)
};
HashSet
subset_candidate_hashes
{
HashSequence
(
all_indices
.
n_elem
-
1
)
};
auto
best_candidate_it
=
py_result
.
initial_estimates
.
begin
();
std
::
unique_ptr
<
nsoptim
::
LsLoss
>
filtered_data_loss
(
new
nsoptim
::
LsLoss
(
loss
.
SharedData
(),
loss
.
IncludeIntercept
()
));
PscResult
<
Optimizer
>
psc_result
(
Optimum
(
*
filtered_data
_loss
,
penalty
));
nsoptim
::
LsLoss
ls_loss
(
loss
.
SharedData
(),
loss
.
IncludeIntercept
(
));
PscResult
<
Optimizer
>
psc_result
(
Optimum
(
ls
_loss
,
penalty
));
arma
::
mat
const
*
pscs
=
&
full_psc_result
.
pscs
;
// The initial "best candidate" comes from the PSC and thus has the wrong objective function value.
...
...
@@ -228,25 +233,26 @@ PyResult<Optimizer> PYIterations(SLoss loss, const typename Optimizer::PenaltyFu
// Start the PY iterations.
int
iter
=
0
;
decltype
(
best_candidate_it
)
insert_candidate_it
;
while
(
true
)
{
HashSet
candidate_hashes
;
// Based on each PSC, "thin-out" the data in 3 different ways and compute the classical estimator
const
uword
psc_index_shift
=
std
::
max
<
uword
>
(
pyconfig
.
keep_psc_proportion
*
filtered_data_loss
->
data
().
n_obs
(),
kMinObs
);
uvec
index_vec
=
arma
::
regspace
<
uvec
>
(
0
,
pscs
->
n_rows
-
1
);
iter_metrics
->
AddDetail
(
"n_obs_psc_filtered"
,
static_cast
<
int
>
(
psc_index_shift
));
const
uword
subset_size
=
std
::
max
<
uword
>
(
pyconfig
.
keep_psc_proportion
*
residuals_keep_ind
.
n_elem
,
kMinObs
);
uvec
subset_indices
=
arma
::
regspace
<
uvec
>
(
0
,
residuals_keep_ind
.
n_elem
-
1
);
// uvec subset_
index_vec = arma::regspace<uvec>(0, pscs->n_rows - 1);
iter_metrics
->
AddDetail
(
"n_obs_psc_filtered"
,
static_cast
<
int
>
(
subset_size
));
auto
insert_candidate_it
=
best_candidate_it
;
insert_candidate_it
=
best_candidate_it
;
for
(
uword
psc_col
=
0
;
psc_col
<
pscs
->
n_cols
;
++
psc_col
)
{
auto
&
psc_metric
=
iter_metrics
->
CreateSubMetrics
(
"psc_subset"
);
// Sort indices according to ascending absolute values of the PSC
std
::
partial_sort
(
index_vec
.
begin
(),
index_vec
.
begin
()
+
psc_index_shift
,
index_vec
.
end
(),
IndexCompAbsoluteAscending
<
subview_vec
>
(
pscs
->
col
(
psc_col
)));
const
auto
estimate_result_absasc
=
EstimateOnSubset
(
*
filtered_data_loss
,
index_vec
.
head
(
psc_index_shift
),
std
::
partial_sort
(
subset_indices
.
begin
(),
subset_indices
.
begin
()
+
subset_size
,
subset_indices
.
end
(),
IndexCompAbsoluteAscending
<
subview_vec
>
(
pscs
->
col
(
psc_col
)));
const
auto
estimate_result_absasc
=
EstimateOnSubset
(
ls_loss
,
residuals_keep_ind
.
rows
(
subset_indices
.
head
(
subset_size
)),
&
pyinit_optim
,
&
py_result
.
initial_estimates
,
&
insert_candidate_it
,
&
candidate_hashes
);
&
insert_candidate_it
,
&
subset_
candidate_hashes
);
psc_metric
.
AddDetail
(
"absasc_result"
,
static_cast
<
int
>
(
estimate_result_absasc
));
if
(
insert_candidate_it
->
metrics
)
{
...
...
@@ -255,12 +261,12 @@ PyResult<Optimizer> PYIterations(SLoss loss, const typename Optimizer::PenaltyFu
}
// Sort indices according to ascending values of the PSC
std
::
partial_sort
(
index_vec
.
begin
(),
index_vec
.
begin
()
+
psc_index_shift
,
index_vec
.
end
()
,
IndexCompAscending
<
subview_vec
>
(
pscs
->
col
(
psc_col
)));
const
auto
estimate_result_asc
=
EstimateOnSubset
(
*
filtered_data_loss
,
index_vec
.
head
(
psc_index_shift
),
std
::
partial_sort
(
subset_indices
.
begin
(),
subset_indices
.
begin
()
+
subset_size
,
subset_indices
.
end
(),
IndexCompAscending
<
subview_vec
>
(
pscs
->
col
(
psc_col
)));
const
auto
estimate_result_asc
=
EstimateOnSubset
(
ls_loss
,
residuals_keep_ind
.
rows
(
subset_indices
.
head
(
subset_size
)
),
&
pyinit_optim
,
&
py_result
.
initial_estimates
,
&
insert_candidate_it
,
&
candidate_hashes
);
&
insert_candidate_it
,
&
subset_
candidate_hashes
);
psc_metric
.
AddDetail
(
"asc_result"
,
static_cast
<
int
>
(
estimate_result_asc
));
if
(
insert_candidate_it
->
metrics
)
{
...
...
@@ -269,12 +275,12 @@ PyResult<Optimizer> PYIterations(SLoss loss, const typename Optimizer::PenaltyFu
}
// Sort indices according to descending values of the PSC
std
::
partial_sort
(
index_vec
.
begin
(),
index_vec
.
begin
()
+
psc_index_shift
,
index_vec
.
end
()
,
IndexCompDescending
<
subview_vec
>
(
pscs
->
col
(
psc_col
)));
const
auto
estimate_result_desc
=
EstimateOnSubset
(
*
filtered_data_loss
,
index_vec
.
head
(
psc_index_shift
),
std
::
partial_sort
(
subset_indices
.
begin
(),
subset_indices
.
begin
()
+
subset_size
,
subset_indices
.
end
(),
IndexCompDescending
<
subview_vec
>
(
pscs
->
col
(
psc_col
)));
const
auto
estimate_result_desc
=
EstimateOnSubset
(
ls_loss
,
residuals_keep_ind
.
rows
(
subset_indices
.
head
(
subset_size
)
),
&
pyinit_optim
,
&
py_result
.
initial_estimates
,
&
insert_candidate_it
,
&
candidate_hashes
);
&
insert_candidate_it
,
&
subset_
candidate_hashes
);
psc_metric
.
AddDetail
(
"desc_result"
,
static_cast
<
int
>
(
estimate_result_desc
));
if
(
insert_candidate_it
->
metrics
)
{
...
...
@@ -293,11 +299,11 @@ PyResult<Optimizer> PYIterations(SLoss loss, const typename Optimizer::PenaltyFu
arma
::
vec
best_candidate_residuals
;
double
best_candidate_mscale
;
// Skip the first element since this is the `best_candidate_it` and already evaluated and iterate just after the
// last inserted element.
++
insert_candidate_it
;
for
(
auto
cand_it
=
++
py_result
.
initial_estimates
.
begin
();
cand_it
!=
insert_candidate_it
;
++
cand_it
)
{
auto
end_check_candidate_it
=
insert_candidate_it
;
++
end_check_candidate_it
;
for
(
auto
cand_it
=
++
py_result
.
initial_estimates
.
begin
();
cand_it
!=
end_check_candidate_it
;
++
cand_it
)
{
// Compute the S-loss of the candidate
const
arma
::
vec
candidate_residuals
=
data
.
cy
()
-
data
.
cx
()
*
cand_it
->
coefs
.
beta
-
cand_it
->
coefs
.
intercept
;
const
auto
candidate_eval
=
loss
.
EvaluateResiduals
(
candidate_residuals
);
...
...
@@ -332,33 +338,39 @@ PyResult<Optimizer> PYIterations(SLoss loss, const typename Optimizer::PenaltyFu
iter_metrics
=
&
py_result
.
metrics
.
CreateSubMetrics
(
"py_iteration"
);
// Move the new best candidate to the front of the list.
std
::
swap_ranges
(
new_best_candidate_it
,
++
new_best_candidate_it
,
py_result
.
initial_estimates
.
begin
());
if
(
insert_candidate_it
!=
new_best_candidate_it
)
{
std
::
iter_swap
(
new_best_candidate_it
,
py_result
.
initial_estimates
.
begin
());
best_candidate_it
=
py_result
.
initial_estimates
.
begin
();
insert_candidate_it
=
best_candidate_it
;
}
else
{
// If the last inserted element is the new best candidate, we need to ensure that `insert_candidate_it`
// still points to the last element for this iteration after the swap!
insert_candidate_it
=
py_result
.
initial_estimates
.
begin
();
std
::
iter_swap
(
new_best_candidate_it
,
insert_candidate_it
);
best_candidate_it
=
py_result
.
initial_estimates
.
begin
();
}
// Use the new best solution to thin-out the data based on the residuals and compute new PSCs.
const
uvec
keep_ind
=
GetResidualKeepIndices
(
best_candidate_residuals
,
best_candidate_mscale
,
pyconfig
,
residuals_
keep_ind
=
GetResidualKeepIndices
(
best_candidate_residuals
,
best_candidate_mscale
,
pyconfig
,
&
all_indices
);
// Store the hashed index vector to avoid unnecessary iterations.
const
auto
hash_insert
=
residual_filtered_data_hashes
.
insert
(
HashIndexVector
(
keep_ind
));
const
auto
hash_insert
=
residual_filtered_data_hashes
.
insert
(
HashIndexVector
(
residuals_
keep_ind
));
if
(
!
hash_insert
.
second
)
{
// The resulting subset of observations was already used before.
// Therefore, the next iteration would result in the exact same solution and we can
// stop the iterations early.
// Therefore, the next iteration would result in duplicate solutions and we can stop.
iter_metrics
->
AddMetric
(
"stop"
,
"filtered residuals are duplicates"
);
break
;
}
// This subset of observations was not yet considered. Continue.
filtered_data_loss
.
reset
(
new
nsoptim
::
LsLoss
(
std
::
make_shared
<
PredictorResponseData
>
(
data
.
Observations
(
keep_ind
)),
loss
.
IncludeIntercept
()
));
nsoptim
::
LsLoss
filtered_ls_loss
(
std
::
make_shared
<
PredictorResponseData
>
(
data
.
Observations
(
residuals_
keep_ind
)),
loss
.
IncludeIntercept
(
));
pyinit_optim
.
loss
(
*
filtered_data
_loss
);
iter_metrics
->
AddDetail
(
"n_obs"
,
static_cast
<
int
>
(
keep_ind
.
n_elem
));
pyinit_optim
.
loss
(
filtered_ls
_loss
);
iter_metrics
->
AddDetail
(
"n_obs"
,
static_cast
<
int
>
(
residuals_
keep_ind
.
n_elem
));
// Compute the optimzer and the PSCs on the residual-filtered data
psc_result
=
PrincipalSensitiviyComponents
(
*
filtered_data
_loss
,
pyinit_optim
);
psc_result
=
PrincipalSensitiviyComponents
(
filtered_ls
_loss
,
pyinit_optim
);
if
(
psc_result
.
optimum
.
metrics
)
{
iter_metrics
->
AddSubMetrics
(
std
::
move
(
*
psc_result
.
optimum
.
metrics
));
...
...
@@ -396,13 +408,37 @@ PyResult<Optimizer> PYIterations(SLoss loss, const typename Optimizer::PenaltyFu
// We continue the iterations, so remove all but the best estimate.
// py_result.initial_estimates.resize(1, *best_candidate_it);
pscs
=
&
psc_result
.
pscs
;
insert_candidate_it
=
best_candidate_it
;
}
// Only retain the estimates from the last iteration plus the estimates that are within 10% of the best solution
// (up to at most the number of estimates from the last iteration).
// `insert_candidate_it` points to the last element inserted in the last iteration.
if
(
pyconfig
.
retain_best_factor
>
1
)
{
const
double
objf_value_cutoff
=
pyconfig
.
retain_best_factor
*
best_candidate_it
->
objf_value
;
while
(
true
)
{
const
auto
current_end
=
py_result
.
initial_estimates
.
end
();
if
(
insert_candidate_it
==
current_end
)
{
break
;
}
auto
tmp_it
=
insert_candidate_it
;
// Search for either the last element or the next element that is "good enough".
int
removing
=
0
;
while
(
++
tmp_it
!=
current_end
&&
tmp_it
->
objf_value
>
objf_value_cutoff
)
{
++
removing
;
}
if
(
removing
>
0
)
{
py_result
.
initial_estimates
.
erase_after
(
insert_candidate_it
,
tmp_it
);
}
insert_candidate_it
=
tmp_it
;
}
}
return
py_result
;
}
template
<
typename
Optimizer
>
inline
SubsetEstimateResult
EstimateOnSubset
(
const
nsoptim
::
LsLoss
&
filtered_data_
loss
,
const
arma
::
subview_col
<
arma
::
uword
>
&
subset
,
Optimizer
*
optim
,
template
<
typename
Optimizer
,
typename
T
>
inline
SubsetEstimateResult
EstimateOnSubset
(
const
nsoptim
::
LsLoss
&
loss
,
const
T
&
subset
,
Optimizer
*
optim
,
alias
::
Optima
<
Optimizer
>*
candidates
,
typename
alias
::
Optima
<
Optimizer
>::
iterator
*
insert_it
,
std
::
unordered_set
<
arma
::
uword
>*
subset_hashes
)
{
...
...
@@ -413,8 +449,8 @@ inline SubsetEstimateResult EstimateOnSubset(const nsoptim::LsLoss& filtered_dat
if
(
subset_hashes
->
find
(
siv_hash
)
==
subset_hashes
->
end
())
{
// The indices are unique. Compute estimator and store hash to avoid this subset in the future.
optim
->
loss
(
nsoptim
::
LsLoss
(
std
::
make_shared
<
nsoptim
::
PredictorResponseData
>
(
filtered_data_
loss
.
data
().
Observations
(
sorted_index_vec
)),
filtered_data_
loss
.
IncludeIntercept
()));
std
::
make_shared
<
nsoptim
::
PredictorResponseData
>
(
loss
.
data
().
Observations
(
sorted_index_vec
)),
loss
.
IncludeIntercept
()));
try
{
// We don't want to visit this subset ever again. Even if there's an error, we assume it's the subset's fault.
subset_hashes
->
insert
(
siv_hash
);
...
...
src/r_robust_utils.cc
View file @
49819291
...
...
@@ -8,6 +8,7 @@
#include "r_robust_utils.hpp"
#include "constants.hpp"
#include "rcpp_integration.hpp"
#include "r_interface_utils.hpp"
#include "alias.hpp"
...
...
@@ -38,7 +39,6 @@ MLocationScaleEstimate GenericMLocationScale(const arma::vec& x, const Mscale<T>
}
}
constexpr
double
kDefaultMLocationEps
=
1e-8
;
constexpr
int
kDefaultMLocationMaxIt
=
100
;
}
// namespace
...
...
@@ -85,7 +85,7 @@ SEXP MLocation(SEXP r_x, SEXP r_scale, SEXP r_opts) noexcept {
auto
opts
=
as
<
Rcpp
::
List
>
(
r_opts
);
double
const
*
const
scale
=
REAL
(
r_scale
);
const
int
max_it
=
GetFallback
(
opts
,
"max_it"
,
kDefaultMLocationMaxIt
);
const
double
convergence_tol
=
GetFallback
(
opts
,
"eps"
,
kDefault
MLocationEps
);
const
double
convergence_tol
=
GetFallback
(
opts
,
"eps"
,
kDefault
ConvergenceTolerance
);
switch
(
static_cast
<
RhoFunctionType
>
(
GetFallback
(
opts
,
"rho"
,
static_cast
<
int
>
(
RhoFunctionType
::
kRhoBisquare
))))
{
case
RhoFunctionType
:
:
kRhoHuber
:
...
...
src/robust_scale_location.hpp
View file @
49819291
...
...
@@ -26,8 +26,6 @@ constexpr double kMadScaleConsistencyConstant = 1.4826;
constexpr
double
kDefaultMscaleDelta
=
0.5
;
//! Default number of iterations for the M-scale algorithm.
constexpr
int
kDefaultMscaleMaxIt
=
100
;
//! Default numeric tolerance for the M-scale algorithm.
constexpr
double
kDefaultMscaleEps
=
1e-7
;
template
<
typename
T
>
struct
DefaultMscaleConstant
{
...
...
@@ -71,7 +69,7 @@ class Mscale {
:
rho_
(
GetFallback
(
user_options
,
"cc"
,
robust_scale_location
::
DefaultMscaleConstant
<
RhoFunction
>::
value
)),
delta_
(
GetFallback
(
user_options
,
"delta"
,
robust_scale_location
::
kDefaultMscaleDelta
)),
max_it_
(
GetFallback
(
user_options
,
"max_it"
,
robust_scale_location
::
kDefaultMscaleMaxIt
)),
eps_
(
GetFallback
(
user_options
,
"eps"
,
robust_scale_location
::
kDefaultMscaleEps
)),
eps_
(
GetFallback
(
user_options
,
"eps"
,
kDefaultConvergenceTolerance
)),
scale_
(
-
1
)
{}
//! Construct the M-scale function.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment