Commit 546ce473 authored by davidkep's avatar davidkep

add pensem and more methods

parent afd345a0
......@@ -8,8 +8,14 @@ S3method(c,starting_point)
S3method(c,starting_points)
S3method(coef,pense_cvfit)
S3method(coef,pense_fit)
S3method(pensem_cv,default)
S3method(pensem_cv,pense_cvfit)
S3method(plot,pense_cvfit)
S3method(plot,pense_fit)
S3method(predict,pense_cvfit)
S3method(predict,pense_fit)
S3method(residuals,pense_cvfit)
S3method(residuals,pense_fit)
export(adamest_cv)
export(adapense_cv)
export(as_starting_point)
......@@ -34,6 +40,8 @@ export(mstep_options)
export(pense)
export(pense_cv)
export(pense_options)
export(pensem)
export(pensem_cv)
export(prinsens)
export(print.nsoptim_metrics)
export(regmest)
......@@ -41,7 +49,6 @@ export(regmest_cv)
export(rho_function)
export(starting_point)
export(tau_size)
importClassesFrom(Matrix,dsparseVector)
importFrom(Matrix,drop)
importFrom(Matrix,sparseMatrix)
importFrom(Matrix,sparseVector)
......
......@@ -5,19 +5,19 @@
#' @param object PENSE regularization path to extract coefficients from.
#' @param lambda a single value of the penalty parameter.
#' @param sparse should coefficients be returned as sparse or dense vectors? Defaults to the `sparse` argument
#' supplied to [pense()]. Can also be set to `sparse = 'matrix'`, in which case a sparse matrix
#' is returned instead of a sparse vector.
#' supplied to [pense()]. Can also be set to `sparse = 'matrix'`, in which case a sparse matrix
#' is returned instead of a sparse vector.
#' @param exact defunct Always gives a warning if `lambda` is not part of the fitted sequence and hence coefficients
#' are interpolated.
#' are interpolated.
#' @param correction defunct.
#' @param ... currently not used.
#' @return either a numeric vector or a sparse vector of type [dsparseVector][Matrix::sparseVector-class]
#' of size \eqn{p + 1}, depending on the `sparse` argument.
#' Note: prior to version 2.0.0 sparse coefficients were returned as sparse matrix of type *dgCMatrix*.
#' To get a sparse matrix, use `sparse = 'matrix'`.
#' of size \eqn{p + 1}, depending on the `sparse` argument.
#' Note: prior to version 2.0.0 sparse coefficients were returned as sparse matrix of type *dgCMatrix*.
#' To get a sparse matrix, use `sparse = 'matrix'`.
#'
#' @seealso coef.cv_pensefit for extracting coefficients from a PENSE fit with hyper-parameters chosen by
#' cross-validation
#' cross-validation
#' @example examples/pense_fit.R
#' @export
#' @importFrom lifecycle deprecate_warn deprecated is_present
......@@ -34,18 +34,21 @@ coef.pense_fit <- function (object, lambda, sparse = NULL, exact = deprecated(),
warn("Only first element in `lambda` is used.")
}
cl <- match.call(expand.dots = TRUE)
concat <- !isFALSE(cl$concat)
lambda <- .as(lambda[[1L]], 'numeric')
if (isTRUE(lambda > object$lambda[[1L]]) && isTRUE(sum(abs(object$estimates[[1L]]$beta)) < .Machine$double.eps)) {
return(.concat_coefs(object$estimates[[1L]], object$call, sparse, parent.frame()))
return(.concat_coefs(object$estimates[[1L]], object$call, sparse, parent.frame(), concat))
}
lambda_match <- .approx_match(lambda, object$lambda)
if (is.na(lambda_match)) {
warn("Requested penalization level not part of the sequence. Returning interpolated coefficients.")
return(.interpolate_coefs(object, lambda, object$lambda, sparse = sparse, envir = parent.frame()))
return(.interpolate_coefs(object, lambda, object$lambda, sparse = sparse, envir = parent.frame(), concat = concat))
} else {
return(.concat_coefs(object$estimates[[lambda_match]], object$call, sparse, parent.frame()))
return(.concat_coefs(object$estimates[[lambda_match]], object$call, sparse, parent.frame(), concat))
}
}
......@@ -60,19 +63,19 @@ coef.pense_fit <- function (object, lambda, sparse = NULL, exact = deprecated(),
#'
#' @param object PENSE with cross-validated hyper-parameters to extract coefficients from.
#' @param lambda either a string specifying which penalty level to use or a a single numeric value of the penalty
#' parameter. See details.
#' parameter. See details.
#' @param se_mult If `lambda = "se"`, the multiple of standard errors to tolerate.
#' @param sparse should coefficients be returned as sparse or dense vectors? Defaults to the `sparse` argument
#' supplied to [pense_cv()]. Can also be set to `sparse = 'matrix'`, in which case a sparse matrix
#' is returned instead of a sparse vector.
#' supplied to [pense_cv()]. Can also be set to `sparse = 'matrix'`, in which case a sparse matrix
#' is returned instead of a sparse vector.
#' @param exact deprecated. Always gives a warning if `lambda` is not part of the fitted sequence and coefficients
#' are interpolated.
#' are interpolated.
#' @param correction defunct.
#' @param ... currently not used.
#' @return either a numeric vector or a sparse vector of type [dsparseVector][Matrix::sparseVector-class]
#' of size \eqn{p + 1}, depending on the `sparse` argument.
#' Note: prior to version 2.0.0 sparse coefficients were returned as sparse matrix of type *dgCMatrix*.
#' To get a sparse matrix, use `sparse = 'matrix'`.
#' of size \eqn{p + 1}, depending on the `sparse` argument.
#' Note: prior to version 2.0.0 sparse coefficients were returned as sparse matrix of type *dgCMatrix*.
#' To get a sparse matrix, use `sparse = 'matrix'`.
#'
#' @example examples/pense_fit.R
#' @export
......@@ -88,12 +91,15 @@ coef.pense_cvfit <- function (object, lambda = c('min', 'se'), se_mult = 2, spar
lambda <- match.arg(lambda)
}
cl <- match.call(expand.dots = TRUE)
concat <- !isFALSE(cl$concat)
if (isFALSE(object$call$fit_all)) {
if (!isTRUE(lambda == 'min')) {
warn(paste("`object` was created with `fit_all = FALSE`. Only the estimate at the minimum is available and will",
"be returned."))
}
return(.concat_coefs(object$estimates[[1L]], object$call, sparse, parent.frame()))
return(.concat_coefs(object$estimates[[1L]], object$call, sparse, parent.frame(), concat))
}
lambda_seq <- object$cvres$lambda
......@@ -118,19 +124,18 @@ coef.pense_cvfit <- function (object, lambda = c('min', 'se'), se_mult = 2, spar
}
if (isTRUE(lambda > lambda_seq[[1L]]) && isTRUE(sum(abs(object$estimates[[1L]]$beta)) < .Machine$double.eps)) {
return(.concat_coefs(object$estimates[[1L]], object$call, sparse, parent.frame()))
return(.concat_coefs(object$estimates[[1L]], object$call, sparse, parent.frame(), concat))
}
lambda_match <- .approx_match(lambda, lambda_seq)
if (is.na(lambda_match)) {
warn("Requested penalization level not part of the sequence. Returning interpolated coefficients.")
return(.interpolate_coefs(object, lambda, lambda_seq, sparse = sparse, envir = parent.frame()))
return(.interpolate_coefs(object, lambda, lambda_seq, sparse = sparse, envir = parent.frame(), concat = concat))
} else {
return(.concat_coefs(object$estimates[[lambda_match]], object$call, sparse, parent.frame()))
return(.concat_coefs(object$estimates[[lambda_match]], object$call, sparse, parent.frame(), concat))
}
}
## Interpolate the coefficients at `lambda` using estimates from `object` at `lambda_seq`.
## @param ... passed on to `.concat_coefs()`
.interpolate_coefs <- function (object, lambda, lambda_seq, ...) {
......@@ -155,7 +160,11 @@ coef.pense_cvfit <- function (object, lambda = c('min', 'se'), se_mult = 2, spar
#' @importFrom methods is
#' @importFrom Matrix sparseVector sparseMatrix
.concat_coefs <- function (coef, call, sparse, envir) {
.concat_coefs <- function (coef, call, sparse, envir, concat = TRUE) {
if (!concat) {
return(coef)
}
# Determine names
var_names <- tryCatch(colnames(eval(call$x, envir = envir)), error = function (e) NULL)
if (is.null(var_names)) {
......
......@@ -9,32 +9,32 @@
#' high leverage. Among all the LS-EN estimates, the estimate with smallest M-scale of the residuals is selected.
#' Observations with largest residual for the selected estimate are removed and the next iteration is started.
#'
#'
#' @param max_it maximum number of EN-PY iterations.
#' @param en_algorithm_opts options for the LS-EN algorithm. See [en_algorithm_options] for details.
#' @param keep_psc_proportion how many observations should to keep based on the Principal Sensitivity Components.
#' @param keep_residuals_measure how to determine what observations to keep, based on their residuals.
#' If `proportion`, a fixed number of observations is kept.
#' If `threshold`, only observations with residuals below the threshold are kept.
#' If `proportion`, a fixed number of observations is kept.
#' If `threshold`, only observations with residuals below the threshold are kept.
#' @param keep_residuals_proportion proportion of observations to kept based on their residuals.
#' @param keep_residuals_threshold only observations with (standardized) residuals less than this threshold are kept.
#' @param retain_best_factor only keep candidates that are within this factor of the best candidate. If `<= 1`, only
#' keep candidates from the last iteration.
#' keep candidates from the last iteration.
#' @param retain_max maximum number of candidates, i.e., only the best `retain_max` candidates are retained.
#'
#' @return options for the ENPY algorithm.
#' @export
enpy_options <- function (max_it = 10, keep_psc_proportion = 0.5,
en_algorithm_opts,
keep_residuals_measure = c('threshold', 'proportion'),
en_algorithm_opts, keep_residuals_measure = c('threshold', 'proportion'),
keep_residuals_proportion = 0.5, keep_residuals_threshold = 2,
retain_best_factor = 2) {
retain_best_factor = 2, retain_max = 500) {
list(max_it = .as(max_it[[1L]], 'integer'),
en_options = if (missing(en_algorithm_opts)) { NULL } else { en_algorithm_opts },
keep_psc_proportion = .as(keep_psc_proportion[[1L]], 'numeric'),
use_residual_threshold = match.arg(keep_residuals_measure) == 'threshold',
keep_residuals_proportion = .as(keep_residuals_proportion[[1L]], 'numeric'),
keep_residuals_threshold = .as(keep_residuals_threshold[[1L]], 'numeric'),
retain_best_factor = .as(retain_best_factor[[1L]], 'numeric'))
retain_best_factor = .as(retain_best_factor[[1L]], 'numeric'),
retain_max = .as(retain_max[[1L]], 'integer'))
}
#' Options for the M-scale Estimation Algorithm
......@@ -74,7 +74,7 @@ mscale_algorithm_options <- function (max_it = 200, eps = 1e-8) {
#' @param max_it maximum number of iterations.
#' @param tightening how to make inner iterations more precise as the algorithm approaches a local minimum.
#' @param tightening_steps for *adaptive* tightening strategy, how often to tighten until the desired tolerance
#' is attained.
#' is attained.
#' @param en_algorithm_opts options for the inner LS-EN algorithm. See [en_algorithm_options] for details.
#'
#' @return options for the MM algorithm.
......@@ -95,12 +95,12 @@ mm_algorithm_options <- function (max_it = 500, tightening = c('adaptive', 'expo
#' To select a specific algorithm and adjust the options, use any of the `en_***_options` functions.
#'
#' * [en_lars_options()]: Use the tuning-free LARS algorithm. This computes _exact_ (up to numerical errors) solutions
#' to the EN-LS problem. It is not iterative and therefore can not benefit from approximate
#' solutions, but in turn guarantees that a solution will be found.
#' to the EN-LS problem. It is not iterative and therefore can not benefit from approximate
#' solutions, but in turn guarantees that a solution will be found.
#' * [en_admm_options()]: Use an iterative ADMM-type algorithm which needs \eqn{O(n p)} operations per iteration and
#' converges sub-linearly.
#' converges sub-linearly.
#' * [en_dal_options()]: Use the iterative Dual Augmented Lagrangian (DAL) method. DAL needs \eqn{O(n^3 p^2)} operations
#' per iteration, but converges exponentially.
#' per iteration, but converges exponentially.
#'
#' @name en_algorithm_options
NULL
......@@ -137,12 +137,12 @@ en_admm_options <- function (max_it = 1000, step_size, acceleration = 1) {
#' @param max_it maximum number of (outer) iterations.
#' @param max_inner_it maximum number of (inner) iterations in each outer iteration.
#' @param eta_multiplier multiplier for the barrier parameter. In each iteration, the barrier must be more restrictive
#' (i.e., the multiplier must be > 1).
#' (i.e., the multiplier must be > 1).
#' @param eta_start_conservative conservative initial barrier parameter. This is used if the previous penalty is
#' undefined or too far away.
#' undefined or too far away.
#' @param eta_start_aggressive aggressive initial barrier parameter. This is used if the previous penalty is close.
#' @param lambda_relchange_aggressive how close must the lambda parameter from the previous penalty term be to use
#' an aggressive initial barrier parameter (i.e., what constitues "too far").
#' an aggressive initial barrier parameter (i.e., what constitues "too far").
#'
#' @return options for the DAL EN algorithm.
#' @family EN algorithms
......
......@@ -14,13 +14,11 @@ initest_options <- function (keep_solutions = 5, psc_method = c("exact", "rr"),
resid_keep_method <- match.arg(resid_keep_method)
list(keepSolutions = keep_solutions,
pscMethod = psc_method,
maxit = maxit,
keepResidualsMethod = resid_keep_method,
keepResidualsThreshold = resid_keep_thresh,
keepResidualsProportion = resid_keep_prop,
keepPSCProportion = psc_keep)
list(keepSolutions = .as(keep_solutions[[1L]], 'integer'), pscMethod = psc_method,
maxit = .as(maxit[[1L]], 'integer'), keepResidualsMethod = resid_keep_method,
keepResidualsThreshold = .as(resid_keep_thresh[[1L]], 'numeric'),
keepResidualsProportion = .as(resid_keep_prop[[1L]], 'numeric'),
keepPSCProportion = .as(psc_keep[[1L]], 'numeric'))
}
#' Deprecated Options for the EN Algorithms
......@@ -45,8 +43,9 @@ en_options_aug_lars <- function (use_gram = c("auto", "yes", "no"), eps = 1e-12)
en_options_dal <- function (maxit = 100, eps = 1e-8, eta_mult = 2, eta_start_numerator = 1e-2,
eta_start, preconditioner = c("approx", "none", "diagonal"), verbosity = 0) {
deprecate_warn('2.0.0', 'en_options_dal()', with = 'en_dal_options()')
en_dal_options(max_it = maxit, eta_multiplier = eta_mult, eta_start_conservative = eta_start_numerator,
eta_start_aggressive = eta_start_numerator)
en_dal_options(max_it = .as(maxit[[1L]], 'integer'), eta_multiplier = .as(eta_mult[[1L]], 'numeric'),
eta_start_conservative = .as(eta_start_numerator[[1L]], 'numeric'),
eta_start_aggressive = .as(eta_start_numerator[[1L]], 'numeric'))
}
......@@ -61,9 +60,9 @@ en_options_dal <- function (maxit = 100, eps = 1e-8, eta_mult = 2, eta_start_num
#' @importFrom lifecycle deprecate_warn
pense_options <- function (delta = 0.25, maxit = 1000, eps = 1e-6, mscale_eps = 1e-8, mscale_maxit = 200,
verbosity = 0, cc = NULL, en_correction = TRUE) {
deprecate_warn('2.0.0', 'pense_options()', with = 'mm_algorithm_options()')
list(maxit = maxit, bdp = delta, eps = eps, mscale_opts = mscale_algorithm_options(max_it = maxit, eps = mscale_eps),
cc = cc)
deprecate_warn('2.0.0', 'pense_options()')
list(maxit = .as(maxit[[1L]], 'integer'), bdp = .as(delta[[1L]], 'numeric'), eps = .as(eps[[1L]], 'numeric'),
mscale_opts = mscale_algorithm_options(max_it = maxit, eps = mscale_eps))
}
#' Deprecated Additional Options for the Penalized EN MM-estimator
......@@ -77,8 +76,8 @@ pense_options <- function (delta = 0.25, maxit = 1000, eps = 1e-6, mscale_eps =
#' @importFrom lifecycle deprecate_warn
mstep_options <- function (cc = 3.44, maxit = 1000, eps = 1e-6, adjust_bdp = FALSE, verbosity = 0,
en_correction = TRUE) {
deprecate_warn('2.0.0', 'mstep_options()', with = 'mm_algorithm_options()')
mm_algorithm_options(max_it = maxit)
deprecate_warn('2.0.0', 'mstep_options()')
list(maxit = .as(maxit[[1L]], 'integer'))
}
#' Deprecated: ENPY Initial Estimates for EN S-Estimators
......
......@@ -14,23 +14,23 @@
#'
#' @param x `n` by `p` matrix of numeric predictors.
#' @param y vector of response values of length `n`.
#' @param alpha value for the alpha parameter, the balance between L1 and L2
#' penalization.
#' @param alpha elastic net penalty mixing parameter with \eqn{0 \le \alpha \le 1}. `alpha = 1` is the LASSO penalty,
#' and `alpha = 0` the Ridge penalty.
#' @param nlambda number of penalization levels.
#' @param lambda_min_ratio Smallest value of the penalization level as a fraction of the largest level (i.e., the
#' smallest value for which all coefficients are zero). The default depends on the sample
#' size relative to the number of variables and `alpha`. If more observations than variables
#' are available, the default is `1e-3 * alpha`, otherwise `1e-2 * alpha`.
#' smallest value for which all coefficients are zero). The default depends on the sample
#' size relative to the number of variables and `alpha`. If more observations than variables
#' are available, the default is `1e-3 * alpha`, otherwise `1e-2 * alpha`.
#' @param lambda optional user-supplied sequence of penalization levels. If given and not `NULL`, `nlambda` and
#' `lambda_min_ratio` are ignored.
#' `lambda_min_ratio` are ignored.
#' @param penalty_loadings a vector of positive penalty loadings (a.k.a. weights) for different penalization of each
#' coefficient.
#' coefficient.
#' @param standardize standardize variables to have unit variance. Coefficients are always returned in original scale.
#' @param weights a vector of positive observation weights.
#' @param intercept include an intercept in the model.
#' @param sparse use sparse coefficient vectors.
#' @param en_algorithm_opts options for the EN algorithm. See [en_algorithm_options]
#' for details.
#' for details.
#' @param eps numerical tolerance.
#' @param xtest deprecated. Instead, extract coefficients with [coef.elnet_est()] and compute predictions manually.
#' @param options deprecated. Use `en_algorithm_opts` instead.
......@@ -112,10 +112,10 @@ elnet <- function(x, y, alpha, nlambda = 100, lambda_min_ratio, lambda, penalty_
#' @param cv_k number of folds per cross-validation.
#' @param cv_repl number of cross-validation replications.
#' @param cv_metric either a string specifying the performance metric to use, or a function to evaluate prediction
#' errors in a single CV replication. If a function, it is called with a single numeric vector of
#' prediction errors and must return a scalar number.
#' errors in a single CV replication. If a function, it is called with a single numeric vector of
#' prediction errors and must return a scalar number.
#' @param fit_all If `TRUE`, fit the EN estimate for all penalization levels. Otherwise, only at penalization
#' level with smallest average CV performance.
#' level with smallest average CV performance.
#' @param ... further arguments to [elnet()].
#'
#' @seealso [elnet()] for computing the LS-EN regularization path without cross-validation.
......
......@@ -9,15 +9,15 @@
#'
#' @param x `n` by `p` matrix of numeric predictors.
#' @param y vector of response values of length `n`.
#' @param alpha value for the alpha parameter, the balance between L1 and L2
#' penalization.
#' @param alpha elastic net penalty mixing parameter with \eqn{0 \le \alpha \le 1}. `alpha = 1` is the LASSO penalty,
#' and `alpha = 0` the Ridge penalty.
#' @param lambda a vector of positive values for the lambda parameter.
#' @param penalty_loadings a vector of positive penalty loadings
#' (a.k.a. weights) for different penalization of each coefficient.
#' @param penalty_loadings a vector of positive penalty loadings (a.k.a. weights) for different penalization of each
#' coefficient.
#' @param intercept include an intercept in the model.
#' @param bdp the desired breakdown point of the estimator, between 0 and 0.5.
#' @param cc consistency constant for the scale estimate. By default, the scale estimate is made consistent for the
#' given breakdown point under the Normal model.
#' given breakdown point under the Normal model.
#' @param sparse use sparse coefficient vectors.
#' @param eps numerical tolerance.
#' @param enpy_opts options for the EN-PY procedure. See [enpy_options] for details.
......@@ -26,8 +26,8 @@
#' @export
#'
#' @references Cohen Freue, G.V.; Kepplinger, D.; Salibián-Barrera, M.; Smucler, E.
#' Robust elastic net estimators for variable selection and identification of proteomic biomarkers.
#' *Ann. Appl. Stat.* **13** (2019), no. 4, 2065--2090 <https://doi.org/10.1214/19-AOAS1269>
#' Robust elastic net estimators for variable selection and identification of proteomic biomarkers.
#' *Ann. Appl. Stat.* **13** (2019), no. 4, 2065--2090 <https://doi.org/10.1214/19-AOAS1269>
#'
#' @importFrom lifecycle deprecate_warn deprecate_stop deprecated is_present
#' @importFrom rlang abort
......@@ -99,30 +99,30 @@ enpy_initial_estimates <- function (x, y, alpha, lambda, bdp = 0.25, cc, interce
#'
#' @param x `n` by `p` matrix of numeric predictors.
#' @param y vector of response values of length `n`.
#' @param alpha value for the alpha parameter, the balance between L1 and L2
#' penalization.
#' @param alpha elastic net penalty mixing parameter with \eqn{0 \le \alpha \le 1}. `alpha = 1` is the LASSO penalty,
#' and `alpha = 0` the Ridge penalty.
#' @param lambda a vector of positive values for the lambda parameter.
#' @param penalty_loadings a vector of positive penalty loadings
#' (a.k.a. weights) for different penalization of each coefficient.
#' @param penalty_loadings a vector of positive penalty loadings (a.k.a. weights) for different penalization of each
#' coefficient.
#' @param intercept include an intercept in the model.
#' @param sparse use sparse coefficient vectors.
#' @param eps numerical tolerance.
#' @param en_algorithm_opts options for the EN algorithm. See [en_algorithm_options] for details.
#' @param ncores number of CPU cores to use in parallel. By default, only one CPU core is used.
#' @param method defunct. PSCs are always computed for EN estimates. For the PY procedure for unpenalized estimation
#' use package [pyinit](https://cran.r-project.org/package=pyinit).
#' use package [pyinit](https://cran.r-project.org/package=pyinit).
#'
#' @return a list of principal sensitivity components, one per element in `lambda`. Each PSC is itself a list
#' with items `lambda`, `alpha`, and `pscs`.
#' with items `lambda`, `alpha`, and `pscs`.
#'
#' @export
#'
#' @references Cohen Freue, G.V.; Kepplinger, D.; Salibián-Barrera, M.; Smucler, E.
#' Robust elastic net estimators for variable selection and identification of proteomic biomarkers.
#' *Ann. Appl. Stat.* **13** (2019), no. 4, 2065--2090 <https://doi.org/10.1214/19-AOAS1269>
#' Robust elastic net estimators for variable selection and identification of proteomic biomarkers.
#' *Ann. Appl. Stat.* **13** (2019), no. 4, 2065--2090 <https://doi.org/10.1214/19-AOAS1269>
#' @references Pena, D., and Yohai, V.J.
#' A Fast Procedure for Outlier Diagnostics in Large Regression Problems.
#' *J. Amer. Statist. Assoc.* **94** (1999). no. 446, 434--445. <https://doi.org/10.2307/2670164>
#' A Fast Procedure for Outlier Diagnostics in Large Regression Problems.
#' *J. Amer. Statist. Assoc.* **94** (1999). no. 446, 434--445. <https://doi.org/10.2307/2670164>
#'
#' @importFrom lifecycle deprecate_warn deprecate_stop deprecated is_present
#' @importFrom rlang abort
......
This diff is collapsed.
This diff is collapsed.
......@@ -5,46 +5,47 @@
#'
#' @param x `n` by `p` matrix of numeric predictors.
#' @param y vector of response values of length `n`.
#' @param alpha value for the alpha parameter, the balance between L1 and L2
#' penalization.
#' @param alpha elastic net penalty mixing parameter with \eqn{0 \le \alpha \le 1}. `alpha = 1` is the LASSO penalty,
#' and `alpha = 0` the Ridge penalty.
#' @param nlambda number of penalization levels.
#' @param lambda_min_ratio Smallest value of the penalization level as a fraction of the largest level (i.e., the
#' smallest value for which all coefficients are zero). The default depends on the sample
#' size relative to the number of variables and `alpha`. If more observations than variables
#' are available, the default is `1e-3 * alpha`, otherwise `1e-2 * alpha`.
#' smallest value for which all coefficients are zero). The default depends on the sample
#' size relative to the number of variables and `alpha`. If more observations than variables
#' are available, the default is `1e-3 * alpha`, otherwise `1e-2 * alpha`.
#' @param penalty_loadings a vector of positive penalty loadings (a.k.a. weights) for different penalization of each
#' coefficient. Only allowed for `alpha` > 0.
#' coefficient. Only allowed for `alpha` > 0.
#' @param standardize logical flag to standardize the `x` variables prior to fitting the M-estimates.
#' Coefficients are always returned on the original scale. This can fail for variables with a large
#' proportion of a single value (e.g., zero-inflated data). In this case, either compute with
#' `standardize = FALSE` or standardize the data manually.
#' @param lambda optional user-supplied sequence of penalization levels. If given and not `NULL`, `nlambda` and
#' `lambda_min_ratio` are ignored.
#' `lambda_min_ratio` are ignored.
#' @param starting_points a list of staring points, created by [starting_point()]. The starting points are shared
#' among all penalization levels.
#' @param standardize logical flag to standardize the `x` variables prior to fitting the M-estimates.
#' Coefficients are always returned on the original scale. This can fail for variables with a large
#' proportion of a single value (e.g., zero-inflated data). In this case, either compute with
#' `standardize = FALSE` or standardize the data manually.
#' among all penalization levels.
#' @param intercept include an intercept in the model.
#' @param add_zero_based also consider the 0-based regularization path in addition to the given starting points.
#' @param cc cutoff constant for Tukey's bisquare \eqn{\rho} function.
#' @param eps numerical tolerance.
#' @param explore_solutions number of solutions to compute up to the desired precision `eps`.
#' @param explore_tol numerical tolerance for exploring possible solutions. Should be (much) looser than `eps` to
#' be useful.
#' be useful.
#' @param max_solutions only retain up to `max_solutions` unique solutions per penalization level.
#' @param comparison_tol numeric tolerance to determine if two solutions are equal. The comparison is first done
#' on the absolute difference in the value of the objective function at the solution
#' If this is less than `comparison_tol`, two solutions are deemed equal if the squared difference
#' of the intercepts is less than `comparison_tol` and the squared \eqn{L_2} norm of the
#' difference vector is less than `comparison_tol`.
#' on the absolute difference in the value of the objective function at the solution
#' If this is less than `comparison_tol`, two solutions are deemed equal if the squared difference
#' of the intercepts is less than `comparison_tol` and the squared \eqn{L_2} norm of the
#' difference vector is less than `comparison_tol`.
#' @param sparse use sparse coefficient vectors.
#' @param ncores number of CPU cores to use in parallel. By default, only one CPU core is used. May not be supported
#' on your platform, in which case a warning is given.
#' @param algorithm_opts options for the M-estimation algorithm. See [mstep_algorithm_options()] for details.
#' on your platform, in which case a warning is given.
#' @param algorithm_opts options for the MM algorithm to compute estimates. See [mm_algorithm_options()] for details.
#' @param mscale_bdp,mscale_opts options for the M-scale estimate used to standardize the predictors
#' (if `standardize = TRUE`).
#' (if `standardize = TRUE`).
#'
#' @return a list-like object with the following items
#' \describe{
#' \item{`lambda`}{the sequence of penalization levels.}
#' \item{`scale`}{the used scale of the residuals.}
#' \item{`estimates`}{a list of estimates. Each estimate contains the following information:
#' \describe{
#' \item{`intercept`}{intercept estimate.}
......@@ -84,7 +85,7 @@ regmest <- function(x, y, alpha, nlambda = 50, lambda, lambda_min_ratio, scale,
fit$estimates <- lapply(fit$estimates, function (ests) {
args$restore_coef_length(args$std_data$unstandardize_coefs(ests[[1L]]))
})
structure(list(estimates = .metrics_attrib(fit$estimates, fit$metrics), call = call,
structure(list(estimates = .metrics_attrib(fit$estimates, fit$metrics), call = call, scale = args$scale,
lambda = unlist(lapply(fit$estimates, `[[`, 'lambda'), use.names = FALSE, recursive = FALSE)),
class = c('mest', 'pense_fit'))
}
......@@ -103,19 +104,19 @@ regmest <- function(x, y, alpha, nlambda = 50, lambda, lambda_min_ratio, scale,
#' @param x `n` by `p` matrix of numeric predictors, as for [regmest()].
#' @param y vector of response values of length `n`, as for [regmest()].
#' @param standardize whether to standardize the `x` variables prior to fitting the PENSE estimates.
#' Can also be set to `"cv_only"`, in which case the input data is not standardized, but the
#' training data in the CV folds is scaled to match the scaling of the input data.
#' Coefficients are always returned on the original scale. This can fail for variables with a large
#' proportion of a single value (e.g., zero-inflated data). In this case, either compute with
#' `standardize = FALSE` or standardize the data manually.
#' Can also be set to `"cv_only"`, in which case the input data is not standardized, but the
#' training data in the CV folds is scaled to match the scaling of the input data.
#' Coefficients are always returned on the original scale. This can fail for variables with a large
#' proportion of a single value (e.g., zero-inflated data). In this case, either compute with
#' `standardize = FALSE` or standardize the data manually.
#' @param lambda optional user-supplied sequence of penalization levels. For details see [regmest()].
#' @param cv_k number of folds per cross-validation.
#' @param cv_repl number of cross-validation replications.
#' @param cv_metric either a string specifying the performance metric to use, or a function to evaluate prediction
#' errors in a single CV replication. If a function, it is called with a single numeric vector of
#' prediction errors and must return a scalar number.
#' errors in a single CV replication. If a function, it is called with a single numeric vector of
#' prediction errors and must return a scalar number.
#' @param fit_all If `TRUE`, fit the PENSE estimate for all penalization levels. Otherwise, only at penalization
#' level with smallest average CV performance.
#' level with smallest average CV performance.
#' @param ... further arguments to [regmest()].
#'
#' @seealso [regmest()] for computing regularized S-estimates without cross-validation.
......@@ -123,6 +124,7 @@ regmest <- function(x, y, alpha, nlambda = 50, lambda, lambda_min_ratio, scale,
#' @return a list with components:
#' \describe{
#' \item{`lambda`}{the sequence of penalization levels.}
#' \item{`scale`}{the used scale of the residuals.}
#' \item{`cvres`}{data frame of average cross-validated performance.}
#' \item{`cv_replications`}{matrix of cross-validated performance metrics, one column per replication.
#' Rows are in the same order as in `cvres`.}
......@@ -190,7 +192,8 @@ regmest_cv <- function(x, y, standardize = TRUE, lambda, cv_k, cv_repl = 1,
})
return(structure(list(call = call, cvres = cv_perf_df, cv_replications = cv_perf, cv_measure = cv_measure_str,
lambda = args$lambda, estimates = .metrics_attrib(fit$estimates, fit$metrics)),
scale = args$scale, lambda = args$lambda,
estimates = .metrics_attrib(fit$estimates, fit$metrics)),
class = c('mest', 'pense_cvfit')))
}
......
#' Predict Method for PENSE Fits
#'
#' Predict response values using a PENSE (or LS-EN) regularization path fitted by [pense()] or [elnet()].
#'
#' @param object PENSE regularization path to extract residuals from.
#' @param newdata an optional matrix of new predictor values. If missing, the fitted values are computed.
#' @param lambda a single value of the penalty parameter.
#' @param exact defunct Always gives a warning if `lambda` is not part of the fitted sequence and coefficients
#' need to be interpolated.
#' @param correction defunct.
#' @param ... currently not used.
#' @return a numeric vector of residuals for the given penalization level.
#'
#' @importFrom Matrix drop
#' @importFrom lifecycle deprecate_warn deprecated is_present
#' @importFrom rlang abort
#'
#' @example examples/residuals.R
#' @export
predict.pense_fit <- function(object, newdata, lambda, exact = deprecated(), correction = deprecated(), ...) {
if (is_present(exact)) {
deprecate_warn('2.0.0', 'residuals(exact=)')
}
if (is_present(correction)) {
deprecate_stop('2.0.0', 'residuals(correction=)')
}
coef_est <- coef(object, lambda = lambda, concat = FALSE)
if (missing(newdata)) {
newdata <- eval.parent(object$call$x)
} else {
newdata <- data.matrix(newdata)
x_dim <- dim(newdata)
if (x_dim[[2L]] != length(coef_est$beta)) {
abort(sprintf("`newdata` is of invalid size (contains %d predictors instead of the required %d)",
x_dim[[2L]], length(coef_est$beta)))
}
}
return(drop(newdata %*% coef_est$beta) + coef_est$intercept)
}
#' Predict Method for PENSE Fits
#'
#' Predict response values using a PENSE (or LS-EN) regularization path with hyper-parameters chosen by
#' cross-validation.
#'
#' If `lambda = "se"` and `object` contains fitted estimates for every penalization level in the sequence, extract the
#' residuals of the most parsimonious model with prediction performance statistically indistinguishable from the best
#' model. This is determined to be the model with prediction performance within `se_mult * cv_se` from the best model.
#'
#' @param object PENSE with cross-validated hyper-parameters to extract coefficients from.
#' @param newdata an optional matrix of new predictor values. If missing, the fitted values are computed.
#' @param lambda either a string specifying which penalty level to use or a a single numeric value of the penalty
#' parameter. See details.
#' @param se_mult If `lambda = "se"`, the multiple of standard errors to tolerate.
#' @param exact deprecated. Always gives a warning if `lambda` is not part of the fitted sequence and coefficients
#' are interpolated.
#' @param correction defunct.
#' @param ... currently not used.
#' @return a numeric vector of residuals for the given penalization level.
#'
#' @importFrom Matrix drop
#' @importFrom lifecycle deprecate_warn deprecated is_present
#' @importFrom rlang warn
#'
#' @example examples/residuals.R
#' @export
predict.pense_cvfit <- function(object, newdata, lambda = c('min', 'se'), se_mult = 2, exact = deprecated(),
correction = deprecated(), ...) {
if (is_present(exact)) {
deprecate_warn('2.0.0', 'coef(exact=)')
}
if (is_present(correction)) {
deprecate_stop('2.0.0', 'coef(correction=)')
}
if (is.character(lambda)) {
lambda <- match.arg(lambda)
}
coef_est <- coef(object, lambda = lambda, se_mult = se_mult, concat = FALSE)
if (missing(newdata)) {
newdata <- eval.parent(object$call$x)
} else {
newdata <- data.matrix(newdata)
x_dim <- dim(newdata)
if (x_dim[[2L]] != length(coef_est$beta)) {
abort(sprintf("`newdata` is of invalid size (contains %d predictors instead of the required %d)",
x_dim[[2L]], length(coef_est$beta)))
}
}
return(drop(newdata %*% coef_est$beta) + coef_est$intercept)
}
#' Extract Residuals
#'
#' Extract residuals from a PENSE (or LS-EN) regularization path fitted by [pense()] or [elnet()].
#'
#' @param object PENSE regularization path to extract residuals from.
#' @param lambda a single value of the penalty parameter.
#' @param exact defunct Always gives a warning if `lambda` is not part of the fitted sequence and coefficients
#' need to be interpolated.
#' @param correction defunct.
#' @param ... currently not used.
#' @return a numeric vector of residuals for the given penalization level.
#'
#' @example examples/residuals.R
#' @export
residuals.pense_fit <- function(object, lambda, exact = deprecated(), correction = deprecated(), ...) {
train_y <- eval.parent(object$call$y)
cl <- match.call(expand.dots = FALSE)
cl[[1L]] <- quote(pense:::predict.pense_fit)
return(train_y - eval.parent(cl))
}
#' Extract Residuals
#'
#' Extract residuals from a PENSE (or LS-EN) regularization path with hyper-parameters chosen by cross-validation.
#'
#' If `lambda = "se"` and `object` contains fitted estimates for every penalization level in the sequence, extract the
#' residuals of the most parsimonious model with prediction performance statistically indistinguishable from the best
#' model. This is determined to be the model with prediction performance within `se_mult * cv_se` from the best model.
#'
#' @param object PENSE with cross-validated hyper-parameters to extract coefficients from.
#' @param lambda either a string specifying which penalty level to use or a a single numeric value of the penalty
#' parameter. See details.
#' @param se_mult If `lambda = "se"`, the multiple of standard errors to tolerate.
#' @param exact deprecated. Always gives a warning if `lambda` is not part of the fitted sequence and coefficients
#' are interpolated.
#' @param correction defunct.
#' @param ... currently not used.
#' @return a numeric vector of residuals for the given penalization level.
#'
#' @example examples/residuals.R
#' @export
residuals.pense_cvfit <- function(object, lambda = c('min', 'se'), se_mult = 2, exact = deprecated(),
correction = deprecated(), ...) {
train_y <- eval.parent(object$call$y)
cl <- match.call(expand.dots = FALSE)
cl[[1L]] <- quote(pense:::predict.pense_cvfit)
return(train_y - eval.parent(cl))
}
......@@ -23,9 +23,9 @@ tau_size <- function (x) {
#' @param x numeric values. Missing values are verbosely ignored.
#' @param bdp desired breakdown point (between 0 and 0.5).
#' @param cc cutoff value for the bisquare rho function. By default, chosen to yield a consistent estimate for the
#' Normal distribution.
#' Normal distribution.
#' @param opts a list of options for the M-scale estimation algorithm, see [mscale_algorithm_options()]
#' for details.
#' for details.
#' @param delta deprecated. Use `bpd` instead.
#' @param rho,eps,maxit deprecated. Instead set control options for the algorithm with the `opts` arguments.
#' @return the M-estimate of scale.
......@@ -74,9 +74,9 @@ mscale <- function (x, bdp = 0.25, cc = consistency_const(bdp, 'bisquare'),
#' @param scale scale of the `x` values. If omitted, uses the [mad()][base::mad()].
#' @param rho the \eqn{\rho} function to use. See [rho_function()] for available functions.
#' @param cc value of the tuning constant for the chosen \eqn{\rho} function.
#' By default, chosen to achieve 95% efficiency under the Normal distribution.
#' By default, chosen to achieve 95% efficiency under the Normal distribution.
#' @param opts a list of options for the M-estimating algorithm, see
#' [mscale_algorithm_options()] for details.
#' [mscale_algorithm_options()] for details.
#' @return a single numeric value, the M-estimate of location.
#' @importFrom stats mad
#' @export
......@@ -107,11 +107,11 @@ mloc <- function (x, scale, rho, cc, opts = mscale_algorithm_options()) {
#' @param x numeric values. Missing values are verbosely ignored.
#' @param bdp desired breakdown point (between 0 and 0.5).
#' @param scale_cc cutoff value for the bisquare \eqn{\rho} function for computing the scale estimate. By default, chosen
#' to yield a consistent estimate for normally distributed values.
#' to yield a consistent estimate for normally distributed values.
#' @param location_rho,location_cc \eqn{\rho} function and cutoff value for computing the location estimate.
#' See [rho_function()] for a list of available \eqn{\rho} functions.
#' See [rho_function()] for a list of available \eqn{\rho} functions.
#' @param opts a list of options for the M-estimating equation,
#' see [mscale_algorithm_options()] for details.
#' see [mscale_algorithm_options()] for details.
#' @return a vector with 2 elements, the M-estimate of location and the M-scale estimate.
#' @export
#' @importFrom rlang warn
......@@ -150,7 +150,7 @@ consistency_const <- function (delta, rho) {
#'
#' @param rho the name of the \eqn{\rho} function to check for existence.
#' @return if `rho` is missing returns a vector of supported \eqn{\rho} function names, otherwise