Resolved all issues raised by the CRAN manual review to meet publication requirements, including:
Increased the number of markers (which was decreased from 10k to 1k to minimize package size and running time) in the example dataset from 1k to 1.1k to ensure the number of markers exceeds the number of subjects.
Moved inst/docker back to the project root and excluded it from the package build.
Add a new method, WtCoxG, and bump version to 0.2.0.
We reorganized the Depends
, Imports
,
Suggests
and LinkingTo
fields in the
DESCRIPTION
file for clarity and correctness. Additionally,
we consolidated Makevars
and Makevars.win
into
a single Makevars
file for simplified build
configuration.
In previous versions, GRAB automatically imported all functions and objects from its dependent packages and exported all package objects to the user’s workspace. We have updated the NAMESPACE file so that GRAB now only imports specific functions as needed from dependencies and only exports user-facing functions. This results in a cleaner namespace and avoids cluttering the user’s environment with unnecessary objects.
We no longer include a copy of GCTA program within the package.
Instead, users have to specify the path of the GCTA executable on their
system using gcta64File
parameter when calling
getTempFilesFullGRM()
. We have successfully tested
getTempFilesFullGRM() with GCTA v1.94.4 on Windows. Despite previous
documentation indicating Linux-only support, this function works on
Windows systems as well.
We have tested and debugged all examples included in the function
help files, and verified that they execute correctly. We have minimized
the package size and example running time. Additionally, we have made
all necessary changes to ensure the package complies with CRAN policies
and requirements, passing R CMD check --as-cran
with no
ERRORs or WARNINGs, and as few NOTEs as possible (one left: one possibly
misspelled word, biobank, in DESCRIPTION).
The old version 0.1.2 can be found in branch release/v0.1.2. This branch will serve as an archive and will no longer be actively maintained.
We make a website for GRAB package: https://wenjianbi.github.io/grab.github.io/. Please check it for more recent update.
Rtools (https://cran.rstudio.com/bin/windows/Rtools/) should be installed before installing this package.
library(remotes) # remotes library requires less dependency packages than devtools
install_github("GeneticAnalysisinBiobanks/GRAB", INSTALL_opts=c("--no-multiarch"), ref="main") # The INSTALL_opts is required in Windows OS.
library(GRAB)
## Commen Variants with MAF ranging from (0.05, 0.5)
set.seed(12345)
= GRAB.SimuGMat(nSub = 500, nFam = 50, FamMode = "10-members", nSNP = 10000,
OutList MaxMAF = 0.5, MinMAF = 0.05)
= OutList$GenoMat
GenoMat = 0.05
MissingRate = sample(length(GenoMat), MissingRate * length(GenoMat))
indexMissing = -9
GenoMat[indexMissing] = system.file("extdata", package = "GRAB")
extDir = paste0(extDir, "/simuPLINK")
extPrefix GRAB.makePlink(GenoMat, extPrefix)
setwd(extDir)
system("plink --file simuPLINK --make-bed --out simuPLINK")
system("plink --bfile simuPLINK --recode A --out simuRAW")
system("plink2 --bfile simuPLINK --export bgen-1.2 bits=8 ref-first --out simuBGEN # UK Biobank use 'ref-first'")
system("bgenix -g simuBGEN.bgen -index")
## Rare Variants with MAF ranging from (0.0001, 0.01)
set.seed(34567)
= GRAB.SimuGMat(nSub = 500, nFam = 50, FamMode = "10-members", nSNP = 10000,
OutList MaxMAF = 0.01, MinMAF = 0.0001)
= OutList$GenoMat
GenoMat = 0.05
MissingRate = sample(length(GenoMat), MissingRate * length(GenoMat))
indexMissing = -9
GenoMat[indexMissing] = system.file("extdata", package = "GRAB")
extDir = paste0(extDir, "/simuPLINK_RV")
extPrefix GRAB.makePlink(GenoMat, extPrefix)
setwd(extDir)
system("plink --file simuPLINK_RV --make-bed --out simuPLINK_RV")
system("plink --bfile simuPLINK_RV --recode A --out simuRAW_RV")
system("plink2 --bfile simuPLINK_RV --export bgen-1.2 bits=8 ref-first --out simuBGEN_RV # UK Biobank use 'ref-first'")
system("bgenix -g simuBGEN_RV.bgen -index")
= system.file("extdata", "simuPLINK.bed", package = "GRAB")
GenoFile = tools::file_path_sans_ext(GenoFile) # remove file extension
PlinkFile = 2
nPartsGRM for(partParallel in 1:nPartsGRM){
getTempFilesFullGRM(PlinkFile, nPartsGRM, partParallel) # this function only supports Linux
}= system.file("SparseGRM", "SparseGRM.txt", package = "GRAB")
SparseGRMFile getSparseGRM(PlinkFile, nPartsGRM, SparseGRMFile)
::fread(SparseGRMFile)
data.table# ID1 ID2 Value
# 1: f1_1 f1_1 0.9591102
# 2: f1_2 f1_2 1.0227757
# 3: f1_3 f1_3 1.0278691
# 4: f1_4 f1_4 1.0138606
# 5: f1_5 f1_1 0.4640743
# ---
# 2546: Subj-496 Subj-496 1.0031059
# 2547: Subj-497 Subj-497 0.9952855
# 2548: Subj-498 Subj-498 0.9829549
# 2549: Subj-499 Subj-499 1.0086861
# 2550: Subj-500 Subj-500 0.9786987
= system.file("extdata", "simuPLINK.fam", package = "GRAB")
FamFile = read.table(FamFile)
FamData = FamData$V2 # Individual ID
IID = length(IID)
n set.seed(678910)
## The below is just to demonstrate the functions of GRAB package
= data.frame(IID = IID, Cova1 = rnorm(n), Cova2 = rbinom(n, 1, 0.5),
Pheno binary = rbinom(n, 1, 0.5),
ordinal = rbinom(n, 3, 0.3),
quantitative = rnorm(n),
time = runif(n),
event = rbinom(n, 1, 0.2))
= system.file("extdata", "simuPHENO.txt", package = "GRAB")
PhenoFile write.table(Pheno, PhenoFile, row.names = FALSE, quote = F, sep = "\t")
set.seed(101112)
= data.frame(REGION = paste0("Region_", rep(1:100,each=100)),
RegionData MARKER = paste0("SNP_",1:10000),
ANNO1 = rbinom(10000, 1, 0.5),
ANNO2 = runif(10000))
= system.file("extdata", "simuPLINK.bed", package = "GRAB")
GenoFile # GenoFile = system.file("extdata", "simuBGEN.bgen", package = "GRAB") # BGEN file input is also supported in null model fitting
= system.file("extdata", "simuPHENO.txt", package = "GRAB")
PhenoFile = read.table(PhenoFile, header = T)
Pheno = system.file("SparseGRM", "SparseGRM.txt", package = "GRAB")
SparseGRMFile = system.file("results", "objNull.RData", package = "GRAB")
objNullFile
= GRAB.NullModel(as.factor(ordinal) ~ Cova1 + Cova2,
objNull data = Pheno,
subset = (event==0),
subjData = Pheno$IID,
method = "POLMM",
traitType = "ordinal",
GenoFile = GenoFile,
SparseGRMFile = SparseGRMFile)
$tau # 0.5655751
objNull
save(objNull, file = objNullFile)
= system.file("results", "objNull.RData", package = "GRAB")
objNullFile load(objNullFile)
= system.file("results", package = "GRAB")
OutputDir = paste0(OutputDir, "/simuMarkerOutput.txt")
OutputFile = system.file("extdata", "simuPLINK.bed", package = "GRAB")
GenoFile
# If 'OutputFile' and 'OutputFileIndex' have existed, users might need to remove them first.
GRAB.Marker(objNull,
GenoFile = GenoFile,
OutputFile = OutputFile)
## Check 'OutputFile' for the analysis results
::fread(OutputFile)
data.table# Marker Info AltFreq AltCounts MissingRate Pvalue beta seBeta
# 1: SNP_1 1:1:G:A 0.3934316 587 0.04481434 0.77381083 0.03200175 0.1113516
# 2: SNP_2 1:2:G:A 0.4561995 677 0.04993598 0.30789980 -0.10816640 -0.1060831
# 3: SNP_3 1:3:G:A 0.3899329 581 0.04609475 0.77397169 0.03153778 0.1098174
# 4: SNP_4 1:4:G:A 0.4433834 650 0.06145967 0.33178713 0.10265438 0.1057725
# 5: SNP_5 1:5:G:A 0.2355705 351 0.04609475 0.59348431 0.06879171 0.1288732
# ---
# 9996: SNP_9996 1:9996:G:A 0.3424566 513 0.04097311 0.04678621 -0.21914094 -0.1102191
# 9997: SNP_9997 1:9997:G:A 0.2686062 397 0.05377721 0.84223235 -0.02476919 -0.1244440
# 9998: SNP_9998 1:9998:G:A 0.0669344 100 0.04353393 0.75590206 -0.06695376 -0.2153778
# 9999: SNP_9999 1:9999:G:A 0.2377384 349 0.06017926 0.45544541 -0.09688906 -0.1298141
# 10000: SNP_10000 1:10000:G:A 0.1361186 202 0.04993598 0.37979954 -0.13401746 -0.1525933
# The below is to show some parameters in control list. For more details, please check the Details section in ?GRAB.Marker
GRAB.Marker(objNull,
GenoFile = GenoFile,
OutputFile = OutputFile,
control = list(nMarkersEachChunk = 1000,
ImputeMethod = "mean",
MissingRateCutoff = 0.05,
MinMAFCutoff = 0.1))
= system.file("results", "objNull.RData", package = "GRAB")
objNullFile load(objNullFile)
= system.file("results", package = "GRAB")
OutputDir = paste0(OutputDir, "/simuRegionOutput.txt")
OutputFile = system.file("extdata", "simuPLINK_RV.bed", package = "GRAB")
GenoFile = system.file("extdata", "simuRegion.txt", package = "GRAB")
RegionFile = system.file("SparseGRM", "SparseGRM.txt", package = "GRAB")
SparseGRMFile
## make sure the output files does not exist at first
file.remove(OutputFile)
file.remove(paste0(OutputFile, ".markerInfo"))
file.remove(paste0(OutputFile, ".index"))
GRAB.Region(objNull,
GenoFile = GenoFile,
OutputFile = OutputFile,
RegionFile = RegionFile,
SparseGRMFile = SparseGRMFile)
::fread(OutputFile)
data.table
## additional columns of "zScore", "nSamplesInGroup", "AltCountsInGroup", "AltFreqInGroup"
## We do not recommend adding too many columns for all markers
file.remove(OutputFile)
file.remove(paste0(OutputFile, ".markerInfo"))
file.remove(paste0(OutputFile, ".index"))
GRAB.Region(objNull,
GenoFile = GenoFile,
OutputFile = OutputFile,
RegionFile = RegionFile,
RegionAnnoHeader = c("ANNO1", "ANNO2"),
SparseGRMFile = SparseGRMFile,
control = list(outputColumns = c("beta", "seBeta", "zScore","nSamplesInGroup","AltCountsInGroup","AltFreqInGroup")))
::fread(OutputFile)
data.table
## Check 'OutputFile' for the analysis results
The current version of GRAB package only supports BGEN v1.2 using 8 bits compression (faster than using 16 bits). For example, if plink2 is used to make BGEN file, please refer to https://www.cog-genomics.org/plink/2.0/data#export
plink2 --bfile input --out output --export bgen-1.2 bits=8
The index file for BGEN file is also required (filename extension is “.bgen.bgi”). Please refer to https://enkre.net/cgi-bin/code/bgen/wiki/bgenix
For SPACox method, we should create the following files
/src/SPACox.cpp
/src/SPACox.hpp
/R/SPACox.R
and modify the following codes
/src/Main.cpp
# static SPACox::SPACoxClass* ptr_gSPACoxobj = NULL;
# void setSPACoxobjInCPP()
/R/GRAB_Null_Model.R
/R/GRAB_Marker.R
/R/GRAB_Region.R