boostrq

Package for boosting regression quantiles (Bauer, Haupt, & Linner, 2023). The functionality as well as the implementation was heavily inspired by the great package for model-based boosting mboost. Until now, only the linear baselearner (brq) is implemented, the nonlinear baselearner (brqss) will follow soon.

Installation

You can install the development version of boostrq from GitHub with:

# install.packages("devtools")
devtools::install_github("stefanlinner/boostrq")

Example

The following demonstrates the basic functionality of the package with a simple example.

### Attaching the package to the search path
library(boostrq)
#> Lade nötiges Paket: mboost
#> Lade nötiges Paket: parallel
#> Lade nötiges Paket: stabs

### Fitting your first boosting regression quantiles model.
boosted.rq <- 
  boostrq(
    formula = mpg ~ brq(cyl * hp) + brq(am + wt), # all common formula operators (*,:,^, etc.) can be used in the function brq()
    data = mtcars, 
    mstop = 200, 
    nu = 0.1, 
    tau = 0.5
  )

### Get some basic information about your model 
boosted.rq$formula # the model formula
#> mpg ~ brq(cyl * hp) + brq(am + wt)
#> <environment: 0x000002d865949d38>

boosted.rq$nu # the learning rate
#> [1] 0.1

boosted.rq$offset # the initialization of the algorithm (default: median of response)
#>  50% 
#> 19.2

boosted.rq$baselearner.names # names of the baselearners
#> [1] "brq(cyl * hp)" "brq(am + wt)"

boosted.rq$call # the model call
#> boostrq(formula = mpg ~ brq(cyl * hp) + brq(am + wt), data = mtcars, 
#>     mstop = 200, nu = 0.1, tau = 0.5)

boosted.rq$mstop() # current number of iterations
#> [1] 200

# or use
mstop(boosted.rq)
#> [1] 200

### Print your fitted model to get a collection of that basic information
boosted.rq
#> 
#>   Boosting Regression Qauntiles
#> 
#> Call:  boostrq(formula = mpg ~ brq(cyl * hp) + brq(am + wt), data = mtcars,      mstop = 200, nu = 0.1, tau = 0.5)
#> formula: mpg ~ brq(cyl * hp) + brq(am + wt)
#> 
#> 
#>   Quantile Regression
#> Loss Function: tau * (y - f) * ((y - f) > 0) +
#>         (tau - 1) * (y - f) * ((y - f) <= 0)
#> Negative Gradient: tau * ((y - f) > 0) + (tau - 1) * ((y - f) <= 0)
#> 
#> Number of boosting iterations: mstop = 200 
#> Step size: = 0.1 
#> Offset:  19.2 
#> Number of baselearners:  2

### Selection frequency of each component
boosted.rq$selection.freqs()
#>     Intercept brq(cyl * hp)  brq(am + wt) 
#>         0.025         0.645         0.330

### Estimated coefficients for current number of iterations
boosted.rq$coef(aggregate = "sum") # also try aggregate = "cumsum" or "none"
#> $`brq(cyl * hp)`
#> (Intercept)         cyl          hp      cyl:hp 
#> 19.33792010 -2.47917356 -0.09859763  0.01061430 
#> 
#> $`brq(am + wt)`
#> (Intercept)          am          wt 
#>    3.133808    1.861487   -1.167737 
#> 
#> $offset
#>  50% 
#> 19.2

# or use
coef(boosted.rq, aggregate = "sum")
#> $`brq(cyl * hp)`
#> (Intercept)         cyl          hp      cyl:hp 
#> 19.33792010 -2.47917356 -0.09859763  0.01061430 
#> 
#> $`brq(am + wt)`
#> (Intercept)          am          wt 
#>    3.133808    1.861487   -1.167737 
#> 
#> $offset
#>  50% 
#> 19.2

### Printing result summaries 
summary(boosted.rq)
#> 
#>   Boosting Regression Qauntiles
#> 
#> Call:  boostrq(formula = mpg ~ brq(cyl * hp) + brq(am + wt), data = mtcars,      mstop = 200, nu = 0.1, tau = 0.5)
#> formula: mpg ~ brq(cyl * hp) + brq(am + wt)
#> 
#> 
#>   Quantile Regression
#> Loss Function: tau * (y - f) * ((y - f) > 0) +
#>         (tau - 1) * (y - f) * ((y - f) <= 0)
#> Negative Gradient: tau * ((y - f) > 0) + (tau - 1) * ((y - f) <= 0)
#> 
#> Number of boosting iterations: mstop = 200 
#> Step size: = 0.1 
#> Offset:  19.2 
#> Number of baselearners:  2 
#> 
#> Estimated coefficients:
#> $`brq(cyl * hp)`
#> (Intercept)         cyl          hp      cyl:hp 
#> 19.33792010 -2.47917356 -0.09859763  0.01061430 
#> 
#> $`brq(am + wt)`
#> (Intercept)          am          wt 
#>    3.133808    1.861487   -1.167737 
#> 
#> $offset
#>  50% 
#> 19.2 
#> 
#> 
#> Selection frequencies:
#>     Intercept brq(cyl * hp)  brq(am + wt) 
#>         0.025         0.645         0.330
### Have a look at the underlying baselearner model matrices
boosted.rq$baselearner.matrix()

### Selected component for each iteration
boosted.rq$xselect()
c("Intercept", boosted.rq$baselearner.names)[boosted.rq$xselect() + 1]

### Current working residuals (negative gradients)
boosted.rq$neg.gradients()

### Course of empirical risk during the boosting process
boosted.rq$risk()

### Current fitted values
boosted.rq$fitted()
# or use
fitted(boosted.rq)

### Current residuals
boosted.rq$resid()
# or use
residuals(boosted.rq)

### Model predictions
dat.pred <- data.frame(
  cyl = c(4, 6),
  hp = c(90, 134),
  wt = c(3.125, 2.485), 
  am = c(1, 0)
)
boosted.rq$predict(newdata = dat.pred, aggregate = "sum") # also try aggregate = "cumsum" or "none"
# or use
predict(boosted.rq, newdata = dat.pred, aggregate = "sum")

### Update the number of iterations without to fully refit the model
### If mstop_new > mstop_current: The fitting process will start at the current number of iterations
### If mstop_new < mstop_current: The model result are subsetted, thus, the model is not refitted

### current number of iterations
boosted.rq$mstop()
#> [1] 200

### Update number of iterations
boosted.rq <- boosted.rq[300]
boosted.rq$mstop()
#> [1] 300

# or use
boosted.rq$subset(400)
boosted.rq$mstop()
#> [1] 400

# or use
mstop(boosted.rq) <- 100
boosted.rq$mstop()
#> [1] 100