tinyVAST
is an R package for fitting vector
autoregressive spatio-temporal (VAST) models. We here explore the
capacity to specify a spatial factor analysis, where the spatial pattern
for multiple variables is described via their estimated association with
a small number of spatial latent variables.
We first explore the ability to specify two latent variables for five manifest variables. To start we simulate two spatial latent variables, project via a simulated loadings matrix, and then simulate a Tweedie response for each manifest variable:
# Simulate settings
theta_xy = 0.4
n_x = n_y = 10
n_c = 5
rho = 0.8
resid_sd = 0.5
# Simulate GMRFs
R_s = exp(-theta_xy * abs(outer(1:n_x, 1:n_y, FUN="-")) )
R_ss = kronecker(X=R_s, Y=R_s)
delta_fs = mvtnorm::rmvnorm(n_c, sigma=R_ss )
#
L_cf = matrix( rnorm(n_c^2), nrow=n_c )
L_cf[,3:5] = 0
L_cf = L_cf + resid_sd * diag(n_c)
#
d_cs = L_cf %*% delta_fs
Where we can inspect the simulated loadings matrix
dimnames(L_cf) = list( paste0("Var ", 1:nrow(L_cf)),
paste0("Factor ", 1:ncol(L_cf)) )
knitr::kable( L_cf,
digits=2, caption="True loadings")
Factor 1 | Factor 2 | Factor 3 | Factor 4 | Factor 5 | |
---|---|---|---|---|---|
Var 1 | -0.77 | 0.26 | 0.0 | 0.0 | 0.0 |
Var 2 | -0.66 | 1.03 | 0.0 | 0.0 | 0.0 |
Var 3 | -0.30 | -0.54 | 0.5 | 0.0 | 0.0 |
Var 4 | -0.57 | 0.34 | 0.0 | 0.5 | 0.0 |
Var 5 | -0.03 | -0.24 | 0.0 | 0.0 | 0.5 |
We then specify the model as expected by tinyVAST:
# Shape into longform data-frame and add error
Data = data.frame( expand.grid(species=1:n_c, x=1:n_x, y=1:n_y),
"var"="logn", "z"=exp(as.vector(d_cs)) )
Data$n = tweedie::rtweedie( n=nrow(Data), mu=Data$z, phi=0.5, power=1.5 )
mean(Data$n==0)
#> [1] 0.03
# make mesh
mesh = fm_mesh_2d( Data[,c('x','y')] )
#
sem = "
f1 -> 1, l1
f1 -> 2, l2
f1 -> 3, l3
f1 -> 4, l4
f1 -> 5, l5
f2 -> 2, l6
f2 -> 3, l7
f2 -> 4, l8
f2 -> 5, l9
f1 <-> f1, NA, 1
f2 <-> f2, NA, 1
1 <-> 1, NA, 0
2 <-> 2, NA, 0
3 <-> 3, NA, 0
4 <-> 4, NA, 0
5 <-> 5, NA, 0
"
# fit model
out = tinyVAST( space_term = sem,
data = Data,
formula = n ~ 0 + factor(species),
spatial_domain = mesh,
family = tweedie(),
variables = c( "f1", "f2", 1:n_c ),
space_columns = c("x","y"),
variable_column = "species",
time_column = "time",
distribution_column = "dist",
control = tinyVASTcontrol(gmrf="proj") )
out
#> Call:
#> tinyVAST(formula = n ~ 0 + factor(species), data = Data, space_term = sem,
#> family = tweedie(), space_columns = c("x", "y"), spatial_domain = mesh,
#> time_column = "time", variable_column = "species", variables = c("f1",
#> "f2", 1:n_c), distribution_column = "dist", control = tinyVASTcontrol(gmrf = "proj"))
#>
#> Run time:
#> Time difference of 2.807202 secs
#>
#> Family:
#> $obs
#>
#> Family: tweedie
#> Link function: log
#>
#>
#>
#>
#> sdreport(.) result
#> Estimate Std. Error
#> alpha_j 0.07570783 0.31850774
#> alpha_j -0.02014888 0.39763412
#> alpha_j 0.22318277 0.21847161
#> alpha_j 0.14728087 0.27057852
#> alpha_j -0.26514652 0.14638317
#> theta_z 0.68016356 0.11510781
#> theta_z 0.68285927 0.15773444
#> theta_z 0.31701846 0.10358197
#> theta_z 0.52123769 0.10914344
#> theta_z 0.14819781 0.09200614
#> theta_z 0.51873790 0.13709521
#> theta_z -0.31998633 0.10049164
#> theta_z 0.23601680 0.10640963
#> theta_z -0.21613586 0.09652980
#> log_sigma -0.52205331 0.06761637
#> log_sigma 0.21851154 0.13313019
#> log_kappa -0.26761196 0.21030826
#> Maximum gradient component: 0.002233932
#>
#> Proportion conditional deviance explained:
#> [1] 0.551999
#>
#> space_term:
#> heads to from parameter start Estimate Std_Error z_value p_value
#> 1 1 1 f1 1 <NA> 0.6801636 0.11510781 5.908926 3.443444e-09
#> 2 1 2 f1 2 <NA> 0.6828593 0.15773444 4.329170 1.496721e-05
#> 3 1 3 f1 3 <NA> 0.3170185 0.10358197 3.060556 2.209261e-03
#> 4 1 4 f1 4 <NA> 0.5212377 0.10914344 4.775713 1.790719e-06
#> 5 1 5 f1 5 <NA> 0.1481978 0.09200614 1.610738 1.072368e-01
#> 6 1 2 f2 6 <NA> 0.5187379 0.13709521 3.783778 1.544654e-04
#> 7 1 3 f2 7 <NA> -0.3199863 0.10049164 -3.184209 1.451504e-03
#> 8 1 4 f2 8 <NA> 0.2360168 0.10640963 2.218002 2.655468e-02
#> 9 1 5 f2 9 <NA> -0.2161359 0.09652980 -2.239058 2.515211e-02
#> 10 2 f1 f1 0 1 1.0000000 NA NA NA
#> 11 2 f2 f2 0 1 1.0000000 NA NA NA
#> 12 2 1 1 0 0 0.0000000 NA NA NA
#> 13 2 2 2 0 0 0.0000000 NA NA NA
#> 14 2 3 3 0 0 0.0000000 NA NA NA
#> 15 2 4 4 0 0 0.0000000 NA NA NA
#> 16 2 5 5 0 0 0.0000000 NA NA NA
#>
#> Fixed terms:
#> Estimate Std_Error z_value p_value
#> factor(species)1 0.07570783 0.3185077 0.23769543 0.81211733
#> factor(species)2 -0.02014888 0.3976341 -0.05067191 0.95958696
#> factor(species)3 0.22318277 0.2184716 1.02156419 0.30698721
#> factor(species)4 0.14728087 0.2705785 0.54431841 0.58622238
#> factor(species)5 -0.26514652 0.1463832 -1.81131833 0.07009159
We can compare the true loadings (rotated to optimize comparison):
Lrot_cf = rotate_pca( L_cf )$L_tf
dimnames(Lrot_cf) = list( paste0("Var ", 1:nrow(Lrot_cf)),
paste0("Factor ", 1:ncol(Lrot_cf)) )
knitr::kable( Lrot_cf,
digits=2, caption="Rotated true loadings")
Factor 1 | Factor 2 | Factor 3 | Factor 4 | Factor 5 | |
---|---|---|---|---|---|
Var 1 | -0.71 | 0.34 | 0.00 | -0.11 | -0.19 |
Var 2 | -1.20 | -0.18 | 0.00 | -0.18 | 0.12 |
Var 3 | 0.22 | 0.73 | -0.17 | -0.09 | 0.10 |
Var 4 | -0.70 | 0.25 | 0.06 | 0.37 | 0.03 |
Var 5 | 0.17 | 0.23 | 0.47 | -0.08 | 0.03 |
with the estimated loadings
# Extract and rotate estimated loadings
Lhat_cf = matrix( 0, nrow=n_c, ncol=2 )
Lhat_cf[lower.tri(Lhat_cf,diag=TRUE)] = as.list(out$sdrep, what="Estimate")$theta_z
Lhat_cf = rotate_pca( L_tf=Lhat_cf, order="decreasing" )$L_tf
#> Warning in sqrt(Eigen$values): NaNs produced
Where we can compared the estimated and true loadings matrices:
dimnames(Lhat_cf) = list( paste0("Var ", 1:nrow(Lhat_cf)),
paste0("Factor ", 1:ncol(Lhat_cf)) )
knitr::kable( Lhat_cf,
digits=2, caption="Rotated estimated loadings" )
Factor 1 | Factor 2 | |
---|---|---|
Var 1 | 0.64 | -0.23 |
Var 2 | 0.82 | 0.26 |
Var 3 | 0.19 | -0.41 |
Var 4 | 0.57 | 0.05 |
Var 5 | 0.07 | -0.25 |
Or we can specify the model while ensuring that residual spatial variation is also captured:
#
sem = "
f1 -> 1, l1
f1 -> 2, l2
f1 -> 3, l3
f1 -> 4, l4
f1 -> 5, l5
f2 -> 2, l6
f2 -> 3, l7
f2 -> 4, l8
f2 -> 5, l9
f1 <-> f1, NA, 1
f2 <-> f2, NA, 1
1 <-> 1, sd_resid
2 <-> 2, sd_resid
3 <-> 3, sd_resid
4 <-> 4, sd_resid
5 <-> 5, sd_resid
"
# fit model
out = tinyVAST( space_term = sem,
data = Data,
formula = n ~ 0 + factor(species),
spatial_domain = mesh,
family = list( "obs"=tweedie() ),
variables = c( "f1", "f2", 1:n_c ),
space_columns = c("x","y"),
variable_column = "species",
time_column = "time",
distribution_column = "dist",
control = tinyVASTcontrol(gmrf="proj") )
# Extract and rotate estimated loadings
Lhat_cf = matrix( 0, nrow=n_c, ncol=2 )
Lhat_cf[lower.tri(Lhat_cf,diag=TRUE)] = as.list(out$sdrep, what="Estimate")$theta_z
#> Warning in Lhat_cf[lower.tri(Lhat_cf, diag = TRUE)] = as.list(out$sdrep, :
#> number of items to replace is not a multiple of replacement length
Lhat_cf = rotate_pca( L_tf=Lhat_cf, order="decreasing" )$L_tf
#> Warning in sqrt(Eigen$values): NaNs produced
Where we can again compared the estimated and true loadings matrices:
dimnames(Lhat_cf) = list( paste0("Var ", 1:nrow(Lhat_cf)),
paste0("Factor ", 1:ncol(Lhat_cf)) )
knitr::kable( Lhat_cf,
digits=2, caption="Rotated estimated loadings with full rank" )
Factor 1 | Factor 2 | |
---|---|---|
Var 1 | 0.69 | -0.18 |
Var 2 | 0.74 | 0.23 |
Var 3 | 0.07 | -0.42 |
Var 4 | 0.47 | -0.02 |
Var 5 | 0.07 | -0.07 |