... ... @@ -35,3 +35,4 @@ Suggests: License: MIT + file LICENSE NeedsCompilation: yes RoxygenNote: 7.1.0 Roxygen: list(markdown = TRUE, load = "source")
 # Generated by roxygen2: do not edit by hand S3method(coef,post_lasso) S3method(coef,thrive) export(post_lasso) export(post_lasso_cv) importClassesFrom(Matrix,dsparseVector) importFrom(Matrix,drop) export(thrive) importFrom(Rcpp,evalCpp) useDynLib(thrive, .registration = TRUE)
 #' Extract Model Coefficients #' #' @param object a post-LASSO estimate to extract coefficients from. #' @param s the number of variables in the model (for \code{what="plasso"}) or the value of the #' penalty parameter (for \code{what="lasso"}). #' @param sparse return a sparse vector or a dense (base R) numeric vector #' @param s the number of variables in the model (for what = "plasso") or the value of the #' penalty parameter (for what = "lasso"). #' @param sparse return sparse vector or dense (base R) vectors. #' @param ... currently not used. #' @return if \code{sparse = FALSE} a numeric vector of size \eqn{p + 1}. #' Otherwise a sparse vector. #' #' @importFrom Matrix drop #' @importClassesFrom Matrix dsparseVector #' @return a list of coefficients. Each coefficient is a list with two components: intercept and beta. #' If sparse = FALSE, beta is a numeric vector of size \eqn{p}, otherwise it is a sparse vector. #' #' @export coef.post_lasso <- function(object, s, what = c("plasso", "lasso"), sparse = TRUE, ...) { coef.post_lasso <- function (object, s, what = c("plasso", "lasso"), sparse = TRUE, ...) { what <- match.arg(what) res <- switch(what, plasso = .plasso_coefs(object, s), lasso = .lasso_coefs(object, s)) ... ... @@ -24,7 +21,31 @@ coef.post_lasso <- function(object, s, what = c("plasso", "lasso"), sparse = TRU return(res) } .plasso_coefs <- function(object, nkeep) { #' Extract Model Coefficients #' #' @param object a thrive estimate to extract coefficients from. #' @param nnz the number of exposures with non-zero coefficient. #' @param sparse return a sparse vector or a dense (base R) numeric vector #' @return a list of coefficients. Each coefficient is a list with two components: intercept and beta. #' If sparse = FALSE, beta is a numeric vector of size \eqn{p}, otherwise it is a sparse vector. #' #' @export coef.thrive <- function (object, nnz, sparse = TRUE, ...) { nvars <- sapply(object$estimates, function (x) length(x$beta@i) ) lapply(nnz, function (nk) { ret_ind <- which(nk == nvars) if (length(ret_ind) == 0L) { warning(sprintf("requested number of variables (%d) not available", nk)) return(list()) } if (isFALSE(sparse)) { object$estimates[[ret_ind]]$beta <- as.numeric(object$estimates[[ret_ind]]$beta) } return(object$estimates[[ret_ind]]) }) } .plasso_coefs <- function (object, nkeep) { nvars <- sapply(object$plasso, function (x) length(x$beta@i) ) unlist(lapply(nkeep, function (nk) { ret_ind <- match(nk, nvars) ... ... @@ -36,7 +57,6 @@ coef.post_lasso <- function(object, s, what = c("plasso", "lasso"), sparse = TRU }), recursive = FALSE, use.names = FALSE) } .lasso_coefs <- function(object, lambda) { lambda_seq <- unlist(lapply(object$lasso, [[, 'lambda')) if (length(lambda_seq) == 0L) { ... ...
 ... ... @@ -118,7 +118,7 @@ post_lasso_cv <- function (x, y, post_keep, cv_k, nlambda = 0, lambda, lambda_mi length(coef$beta@i) }) plasso_prederr <- lapply(train_fit$plasso, function (coef) { y_test - coef$intercept - drop(x_test %*% coef$beta) y_test - coef$intercept - as.numeric(x_test %*% coef$beta) }) if (any(duplicated(plasso_nact))) { ... ... @@ -131,7 +131,6 @@ post_lasso_cv <- function (x, y, post_keep, cv_k, nlambda = 0, lambda, lambda_mi } else { plasso_prederr <- matrix(unlist(plasso_prederr), nrow = length(y_test)) } plasso_prederr[ , match(nact_full_present, plasso_nact)] } else { numeric(0L) ... ...
 #' Compute Thrice-Robust Instrumental Variable Estimates #' #' Post-LASSO estimates regressing each exposure on the instruments are selected via cross-validation. #' Compute thrice-robust instrumental variable estimates. #' The exposures x are first de-confounded using instruments z, and the response y is then regressed on #' the de-confounded exposures. #' Estimates for de-confounding the exposures are computed via post-LASSO and the reguarlization parameter #' is selected via cross-validation. #' #' #' @param x matrix of exposures. Should be properly standardized beforehand. #' @param y vector of response values. ... ... @@ -18,6 +23,14 @@ #' @param cv_metric either a string specifying the performance metric to use, or a function to evaluate prediction #' errors in a single CV replication. If a function, it is called with a single numeric vector of #' prediction errors and must return a scalar number. #' @return #' \item{plasso}{list of 2nd-stage PLASSO regression coefficients.} #' \item{x_fitted}{the fitted exposures.} #' \item{cv_perf_plasso}{cross-validated prediction performance of the estimates based on x_fitted.} #' \item{cv_perf_grid}{cross-validated prediction performance of the estimates based on all the considered fitted #' exposures.} #' \item{z_coefs}{coefficients for regressing each exposure on the instruments.} #' @export thrive <- function (x, y, z, cv_k, model_size_span = 1, model_sizes_x, model_size_y, intercept = TRUE, cv_repl = 1, cv_metric = c('rmspe', 'mape')) { y <- drop(y) ... ... @@ -51,26 +64,26 @@ thrive <- function (x, y, z, cv_k, model_size_span = 1, model_sizes_x, model_siz } model_size_y <- if (missing(model_size_y) || is.null(model_size_y)) { seq_len(x_dim[[2L]]) seq_len(x_dim[[2L]] + 1L) - 1L } else if (is.numeric(model_size_y)) { sort(as.integer(model_size_y)) } else { stop("model_size_y must be an integer vector.") } model_size_x <- if (missing(model_size_x) || is.null(model_size_x)) { model_sizes_x <- if (missing(model_sizes_x) || is.null(model_sizes_x)) { rep.int(list(seq_len(z_dim[[2L]] + 1L) - 1L), x_dim[[2L]]) } else if (is.list(model_size_x)) { if (length(model_size_x) != x_dim[[2L]]) { stop("If model_size_x is a list, it must contain one integer vector per column in x.") } else if (is.list(model_sizes_x)) { if (length(model_sizes_x) != x_dim[[2L]]) { stop("If model_sizes_x is a list, it must contain one integer vector per column in x.") } lapply(model_size_x, function (msx) { lapply(model_sizes_x, function (msx) { if (!is.numeric(msx)) { stop("If model_size_x is a list, it must contain one integer vector per column in x.") stop("If model_sizes_x is a list, it must contain one integer vector per column in x.") } sort(as.integer(msx)) }) } else if (is.numeric(model_size_x)) { } else if (is.numeric(model_sizes_x)) { rep.int(list(sort(as.integer(model_size_y))), x_dim[[2L]]) } else { stop("model_size_y must be an integer vector or a list of integer vectors.") ... ... @@ -79,7 +92,7 @@ thrive <- function (x, y, z, cv_k, model_size_span = 1, model_sizes_x, model_siz # First stage, regress each exposure on all instruments stage_1 <- lapply(seq_len(x_dim[[2L]]), function (j) { pr <- post_lasso_cv(x = z, y = x[, j, drop = TRUE], post_keep = model_size_x[[j]], lambda = numeric(0L), pr <- post_lasso_cv(x = z, y = x[, j, drop = TRUE], post_keep = model_sizes_x[[j]], lambda = numeric(0L), intercept = intercept, cv_k = cv_k, cv_repl = cv_repl, cv_metric = cv_metric) best_model <- which.min(pr$cv_perf_plasso$cvm) ... ... @@ -88,7 +101,7 @@ thrive <- function (x, y, z, cv_k, model_size_span = 1, model_sizes_x, model_siz coefs <- pr$plasso[model_grid] fitted <- lapply(coefs, function (coef) { drop(z %*% coef$beta) + coef$intercept as.numeric(z %*% coef$beta) + coef$intercept }) list(coefs = coefs, fitted = fitted) }) ... ... @@ -98,6 +111,7 @@ thrive <- function (x, y, z, cv_k, model_size_span = 1, model_sizes_x, model_siz pr <- post_lasso_cv(x = x_fitted, y = y, post_keep = model_size_y, lambda = numeric(0L), intercept = intercept, cv_k = cv_k, cv_repl = cv_repl, cv_metric = cv_metric) pr$x_fitted <- x_fitted pr$cv_perf_plasso$grid_step <- k - model_size_span - 1L pr$z_coefs <- lapply(stage_1, function (s1) { s1$coefs[[k]] }) return(pr) }) ... ... @@ -108,7 +122,12 @@ thrive <- function (x, y, z, cv_k, model_size_span = 1, model_sizes_x, model_siz best_stage_2 <- stage_2[[which.min(stage_2_min_cv)]] best_stage_2$cv_perf_lasso <- NULL best_stage_2$lasso <- NULL best_stage_2\$cv_perf_grid <- do.call(rbind, lapply(stage_2, [[, 'cv_perf_plasso')) class(best_stage_2) <- 'thrive' names(best_stage_2)[[which(names(best_stage_2) == 'plasso')]] <- 'estimates' return(best_stage_2) } ... ...
 ... ... @@ -9,16 +9,16 @@ \arguments{ \item{object}{a post-LASSO estimate to extract coefficients from.} \item{s}{the number of variables in the model (for \code{what="plasso"}) or the value of the penalty parameter (for \code{what="lasso"}).} \item{s}{the number of variables in the model (for \code{what = "plasso"}) or the value of the penalty parameter (for \code{what = "lasso"}).} \item{sparse}{return a sparse vector or a dense (base R) numeric vector} \item{sparse}{return sparse vector or dense (base R) vectors.} \item{...}{currently not used.} } \value{ if \code{sparse = FALSE} a numeric vector of size \eqn{p + 1}. Otherwise a sparse vector. a list of coefficients. Each coefficient is a list with two components: \code{intercept} and \code{beta}. If \code{sparse = FALSE}, \code{beta} is a numeric vector of size \eqn{p}, otherwise it is a sparse vector. } \description{ Extract Model Coefficients ... ...
 % Generated by roxygen2: do not edit by hand % Please edit documentation in R/coef-methods.R \name{coef.thrive} \alias{coef.thrive} \title{Extract Model Coefficients} \usage{ \method{coef}{thrive}(object, nnz, sparse = TRUE, ...) } \arguments{ \item{object}{a thrive estimate to extract coefficients from.} \item{nnz}{the number of exposures with non-zero coefficient.} \item{sparse}{return a sparse vector or a dense (base R) numeric vector} } \value{ a list of coefficients. Each coefficient is a list with two components: \code{intercept} and \code{beta}. If \code{sparse = FALSE}, \code{beta} is a numeric vector of size \eqn{p}, otherwise it is a sparse vector. } \description{ Extract Model Coefficients }
 ... ... @@ -34,18 +34,18 @@ post_lasso_cv( \item{y}{response vector.} \item{post_keep}{sizes of models for which post-LASSO estimates are to be computed. If missing, post-LASSO estimates are computed for all possible models, i.e., models of size 1 ... ncol(x). If NULL, no post-LASSO estimates are computed.} are computed for all possible models, i.e., models of size 1 ... \code{ncol(x)}. If \code{NULL}, no post-LASSO estimates are computed.} \item{nlambda}{number of penalization levels for which LASSO estimates are computed.} \item{lambda}{optional user-supplied sequence of penalization levels. If given and not NULL, nlambda and lambda_min_ratio are ignored.} \item{lambda}{optional user-supplied sequence of penalization levels. If given and not \code{NULL}, \code{nlambda} and \code{lambda_min_ratio} are ignored.} \item{lambda_min_ratio}{Smallest value of the penalization level as a fraction of the largest level (i.e., the smallest value for which all coefficients are zero). The default depends on the sample size relative to the number of variables. If more observations than variables are available, the default is 1e-3, otherwise 1e-2.} are available, the default is \code{1e-3}, otherwise \code{1e-2}.} \item{intercept}{should an intercept be estimated?} ... ... @@ -59,15 +59,15 @@ prediction errors and must return a scalar number.} } \value{ \item{lambda}{vector of lambda values.} \item{lasso}{list of LASSO regression coefficients.} \item{plasso}{list of PLASSO regression coefficients.} \item{lasso}{list of LASSO regression coefficients.} \item{plasso}{list of PLASSO regression coefficients.} } \description{ Estimate the LASSO and Post-LASSO (PLASSO) regression coefficients. Post-LASSO estimates are computed for all models of size in post_keep, whereas LASSO estimates are computed for all penalization levels in lambda. Post-LASSO estimates are computed for all models of size in \code{post_keep}, whereas LASSO estimates are computed for all penalization levels in \code{lambda}. [post_lasso_cv()] automatically cross-validates (post)-LASSO estimates. \code{\link[=post_lasso_cv]{post_lasso_cv()}} automatically cross-validates (post)-LASSO estimates. } \details{ Post-LASSO estimates are ordinary least square estimates of the coefficients for variables selected by LASSO. ... ...
 ... ... @@ -29,12 +29,12 @@ thrive( \item{model_size_span}{span of the grid of model sizes to consider in the second stage. See details.} \item{model_sizes_x}{model sizes to consider for the exposures. If missing, consider all possible model sizes for each exposure (i.e., models of size 1 ... ncol(z)). If an integer vector, the same model sizes are considered for every exposure. If a list the same length as x for each exposure (i.e., models of size 1 ... \code{ncol(z)}). If an integer vector, the same model sizes are considered for every exposure. If a list the same length as \code{x} columns, different model sizes are considered for each exposure.} \item{model_size_y}{model sizes to consider for the second stage regression, i.e., for regressing the response y on the de-confounded matrix of exposures.} response \code{y} on the de-confounded matrix of exposures.} \item{intercept}{should an intercept be estimated?} ... ... @@ -44,6 +44,18 @@ response y on the de-confounded matrix of exposures.} errors in a single CV replication. If a function, it is called with a single numeric vector of prediction errors and must return a scalar number.} } \value{ \item{plasso}{list of 2nd-stage PLASSO regression coefficients.} \item{x_fitted}{the fitted exposures.} \item{cv_perf_plasso}{cross-validated prediction performance of the estimates based on \code{x_fitted}.} \item{cv_perf_grid}{cross-validated prediction performance of the estimates based on all the considered fitted exposures.} \item{z_coefs}{coefficients for regressing each exposure on the instruments.} } \description{ Post-LASSO estimates regressing each exposure on the instruments are selected via cross-validation. Compute thrice-robust instrumental variable estimates. The exposures \code{x} are first de-confounded using instruments \code{z}, and the response \code{y} is then regressed on the de-confounded exposures. Estimates for de-confounding the exposures are computed via post-LASSO and the reguarlization parameter is selected via cross-validation. }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!