Package for boosting regression quantiles (Bauer, Haupt, & Linner, 2023). The functionality as well as the implementation was heavily inspired by the great package for model-based boosting mboost. Until now, only the linear baselearner (brq) is implemented, the nonlinear baselearner (brqss) will follow soon.
You can install the development version of boostrq from GitHub with:
The following demonstrates the basic functionality of the package with a simple example.
### Attaching the package to the search path
library(boostrq)
#> Lade nötiges Paket: mboost
#> Lade nötiges Paket: parallel
#> Lade nötiges Paket: stabs
### Fitting your first boosting regression quantiles model.
boosted.rq <-
boostrq(
formula = mpg ~ brq(cyl * hp) + brq(am + wt), # all common formula operators (*,:,^, etc.) can be used in the function brq()
data = mtcars,
mstop = 200,
nu = 0.1,
tau = 0.5
)
### Get some basic information about your model
boosted.rq$formula # the model formula
#> mpg ~ brq(cyl * hp) + brq(am + wt)
#> <environment: 0x000002d865949d38>
boosted.rq$nu # the learning rate
#> [1] 0.1
boosted.rq$offset # the initialization of the algorithm (default: median of response)
#> 50%
#> 19.2
boosted.rq$baselearner.names # names of the baselearners
#> [1] "brq(cyl * hp)" "brq(am + wt)"
boosted.rq$call # the model call
#> boostrq(formula = mpg ~ brq(cyl * hp) + brq(am + wt), data = mtcars,
#> mstop = 200, nu = 0.1, tau = 0.5)
boosted.rq$mstop() # current number of iterations
#> [1] 200
# or use
mstop(boosted.rq)
#> [1] 200
### Print your fitted model to get a collection of that basic information
boosted.rq
#>
#> Boosting Regression Qauntiles
#>
#> Call: boostrq(formula = mpg ~ brq(cyl * hp) + brq(am + wt), data = mtcars, mstop = 200, nu = 0.1, tau = 0.5)
#> formula: mpg ~ brq(cyl * hp) + brq(am + wt)
#>
#>
#> Quantile Regression
#> Loss Function: tau * (y - f) * ((y - f) > 0) +
#> (tau - 1) * (y - f) * ((y - f) <= 0)
#> Negative Gradient: tau * ((y - f) > 0) + (tau - 1) * ((y - f) <= 0)
#>
#> Number of boosting iterations: mstop = 200
#> Step size: = 0.1
#> Offset: 19.2
#> Number of baselearners: 2
### Selection frequency of each component
boosted.rq$selection.freqs()
#> Intercept brq(cyl * hp) brq(am + wt)
#> 0.025 0.645 0.330
### Estimated coefficients for current number of iterations
boosted.rq$coef(aggregate = "sum") # also try aggregate = "cumsum" or "none"
#> $`brq(cyl * hp)`
#> (Intercept) cyl hp cyl:hp
#> 19.33792010 -2.47917356 -0.09859763 0.01061430
#>
#> $`brq(am + wt)`
#> (Intercept) am wt
#> 3.133808 1.861487 -1.167737
#>
#> $offset
#> 50%
#> 19.2
# or use
coef(boosted.rq, aggregate = "sum")
#> $`brq(cyl * hp)`
#> (Intercept) cyl hp cyl:hp
#> 19.33792010 -2.47917356 -0.09859763 0.01061430
#>
#> $`brq(am + wt)`
#> (Intercept) am wt
#> 3.133808 1.861487 -1.167737
#>
#> $offset
#> 50%
#> 19.2
### Printing result summaries
summary(boosted.rq)
#>
#> Boosting Regression Qauntiles
#>
#> Call: boostrq(formula = mpg ~ brq(cyl * hp) + brq(am + wt), data = mtcars, mstop = 200, nu = 0.1, tau = 0.5)
#> formula: mpg ~ brq(cyl * hp) + brq(am + wt)
#>
#>
#> Quantile Regression
#> Loss Function: tau * (y - f) * ((y - f) > 0) +
#> (tau - 1) * (y - f) * ((y - f) <= 0)
#> Negative Gradient: tau * ((y - f) > 0) + (tau - 1) * ((y - f) <= 0)
#>
#> Number of boosting iterations: mstop = 200
#> Step size: = 0.1
#> Offset: 19.2
#> Number of baselearners: 2
#>
#> Estimated coefficients:
#> $`brq(cyl * hp)`
#> (Intercept) cyl hp cyl:hp
#> 19.33792010 -2.47917356 -0.09859763 0.01061430
#>
#> $`brq(am + wt)`
#> (Intercept) am wt
#> 3.133808 1.861487 -1.167737
#>
#> $offset
#> 50%
#> 19.2
#>
#>
#> Selection frequencies:
#> Intercept brq(cyl * hp) brq(am + wt)
#> 0.025 0.645 0.330
### Have a look at the underlying baselearner model matrices
boosted.rq$baselearner.matrix()
### Selected component for each iteration
boosted.rq$xselect()
c("Intercept", boosted.rq$baselearner.names)[boosted.rq$xselect() + 1]
### Current working residuals (negative gradients)
boosted.rq$neg.gradients()
### Course of empirical risk during the boosting process
boosted.rq$risk()
### Current fitted values
boosted.rq$fitted()
# or use
fitted(boosted.rq)
### Current residuals
boosted.rq$resid()
# or use
residuals(boosted.rq)
### Model predictions
dat.pred <- data.frame(
cyl = c(4, 6),
hp = c(90, 134),
wt = c(3.125, 2.485),
am = c(1, 0)
)
boosted.rq$predict(newdata = dat.pred, aggregate = "sum") # also try aggregate = "cumsum" or "none"
# or use
predict(boosted.rq, newdata = dat.pred, aggregate = "sum")
### Update the number of iterations without to fully refit the model
### If mstop_new > mstop_current: The fitting process will start at the current number of iterations
### If mstop_new < mstop_current: The model result are subsetted, thus, the model is not refitted
### current number of iterations
boosted.rq$mstop()
#> [1] 200
### Update number of iterations
boosted.rq <- boosted.rq[300]
boosted.rq$mstop()
#> [1] 300
# or use
boosted.rq$subset(400)
boosted.rq$mstop()
#> [1] 400
# or use
mstop(boosted.rq) <- 100
boosted.rq$mstop()
#> [1] 100