Test for association between a set of SNPS/genes and continuous or binary outcomes by including variant characteristic information and using (weighted) score statistics.

Note:

Installation

# Install MiSTr from CRAN:
install.packages("MiSTr")

# Or the the development version from GitHub:
# install.packages("remotes")
remotes::install_github("mcanouil/MiSTr")

MiSTr in Action

library(MiSTr)
data(mist_data)
attach(mist_data)

Continuous Outcome

With Heterogeneity (τ)

mist(
  y = phenotypes[, "y_tau"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes,
  Z = variants_info[, 1, drop = FALSE],
  method = "liu",
  model = "continuous"
)
#> [MiSTr] Linear regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters Pi_hat    SE CI_2.5 CI_97.5
#> 1    cluster1 -0.467 0.284 -1.031   0.097
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 3.4e-05
#>   + PI (mean effect):  
#>     * Score = 2.659
#>     * P-value = 0.103
#>   + TAU (heterogeneous effect):  
#>     * Score = 1464.924
#>     * P-value = 2.37e-05

With Heterogeneity (τ) “Removed”

variants_info[, "effect"] # simulated effect
#>  g_variant1  g_variant2  g_variant3  g_variant4  g_variant5  g_variant6 
#>  -0.1871895  -0.6536863  -0.6635657  -5.7526938  -0.7180167  -0.3196585 
#>  g_variant7  g_variant8  g_variant9 g_variant10 
#>   2.0680578  -0.7141979  -1.8494894   1.2992623
get_same_effect <- names(which(variants_info[, "effect"] > 0))
mist(
  y = phenotypes[, "y_tau"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes[, get_same_effect],
  Z = variants_info[get_same_effect, 1, drop = FALSE],
  method = "liu",
  model = "continuous"
)
#> [MiSTr] Linear regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters Pi_hat    SE CI_2.5 CI_97.5
#> 1    cluster1   0.77 0.627 -0.475   2.014
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 0.531
#>   + PI (mean effect):  
#>     * Score = 1.5
#>     * P-value = 0.221
#>   + TAU (heterogeneous effect):  
#>     * Score = 0.217
#>     * P-value = 0.932

With “Average” Effect (π)

mist(
  y = phenotypes[, "y_pi"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes,
  Z = variants_info[, 1, drop = FALSE],
  method = "liu",
  model = "continuous"
)
#> [MiSTr] Linear regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters Pi_hat    SE CI_2.5 CI_97.5
#> 1    cluster1  0.809 0.294  0.225   1.392
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 0.0376
#>   + PI (mean effect):  
#>     * Score = 7.083
#>     * P-value = 0.00778
#>   + TAU (heterogeneous effect):  
#>     * Score = 185.198
#>     * P-value = 0.793

With “Average” Effect (π) and Heterogeneity (τ)

mist(
  y = phenotypes[, "y_taupi"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes,
  Z = variants_info[, 1, drop = FALSE],
  method = "liu",
  model = "continuous"
)
#> [MiSTr] Linear regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters Pi_hat    SE CI_2.5 CI_97.5
#> 1    cluster1  0.248 0.321 -0.389   0.885
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 0.0307
#>   + PI (mean effect):  
#>     * Score = 0.601
#>     * P-value = 0.438
#>   + TAU (heterogeneous effect):  
#>     * Score = 1006.125
#>     * P-value = 0.0111

With “Average” Effect (π) and With Heterogeneity (τ) “Removed”

variants_info[, "effect"] # simulated effect
#>  g_variant1  g_variant2  g_variant3  g_variant4  g_variant5  g_variant6 
#>  -0.1871895  -0.6536863  -0.6635657  -5.7526938  -0.7180167  -0.3196585 
#>  g_variant7  g_variant8  g_variant9 g_variant10 
#>   2.0680578  -0.7141979  -1.8494894   1.2992623
get_same_effect <- names(which(variants_info[, "effect"] > 0))
mist(
  y = phenotypes[, "y_taupi"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes[, get_same_effect],
  Z = variants_info[get_same_effect, 1, drop = FALSE],
  method = "liu",
  model = "continuous"
)
#> [MiSTr] Linear regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters Pi_hat    SE CI_2.5 CI_97.5
#> 1    cluster1  1.517 0.689  0.149   2.884
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 0.121
#>   + PI (mean effect):  
#>     * Score = 4.661
#>     * P-value = 0.0308
#>   + TAU (heterogeneous effect):  
#>     * Score = 1.427
#>     * P-value = 0.842

Continuous Outcome with Weights

With Heterogeneity (τ)

mist(
  y = phenotypes[, "y_tau"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes,
  Z = variants_info[, 1, drop = FALSE],
  method = "liu",
  model = "continuous",
  weight.beta = c(1, 25),
  maf = variants_info[, "maf"]
)
#> [MiSTr] Linear regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters Pi_hat    SE CI_2.5 CI_97.5
#> 1    cluster1 -0.467 0.284 -1.031   0.097
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 0.000115
#>   + PI (mean effect):  
#>     * Score = 2.659
#>     * P-value = 0.103
#>   + TAU (heterogeneous effect):  
#>     * Score = 171103.8
#>     * P-value = 8.87e-05

With Heterogeneity (τ) “Removed”

variants_info[, "effect"] # simulated effect
#>  g_variant1  g_variant2  g_variant3  g_variant4  g_variant5  g_variant6 
#>  -0.1871895  -0.6536863  -0.6635657  -5.7526938  -0.7180167  -0.3196585 
#>  g_variant7  g_variant8  g_variant9 g_variant10 
#>   2.0680578  -0.7141979  -1.8494894   1.2992623
get_same_effect <- names(which(variants_info[, "effect"] > 0))
mist(
  y = phenotypes[, "y_tau"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes[, get_same_effect],
  Z = variants_info[get_same_effect, 1, drop = FALSE],
  method = "liu",
  model = "continuous",
  weight.beta = c(1, 25),
  maf = variants_info[get_same_effect, "maf"]
)
#> [MiSTr] Linear regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters Pi_hat    SE CI_2.5 CI_97.5
#> 1    cluster1   0.77 0.627 -0.475   2.014
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 0.531
#>   + PI (mean effect):  
#>     * Score = 1.5
#>     * P-value = 0.221
#>   + TAU (heterogeneous effect):  
#>     * Score = 25.306
#>     * P-value = 0.932

With “Average” Effect (π)

mist(
  y = phenotypes[, "y_pi"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes,
  Z = variants_info[, 1, drop = FALSE],
  method = "liu",
  model = "continuous",
  weight.beta = c(1, 25),
  maf = variants_info[, "maf"]
)
#> [MiSTr] Linear regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters Pi_hat    SE CI_2.5 CI_97.5
#> 1    cluster1  0.809 0.294  0.225   1.392
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 0.0383
#>   + PI (mean effect):  
#>     * Score = 7.083
#>     * P-value = 0.00778
#>   + TAU (heterogeneous effect):  
#>     * Score = 23859.6
#>     * P-value = 0.81

With “Average” Effect (π) and Heterogeneity (τ)

mist(
  y = phenotypes[, "y_taupi"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes,
  Z = variants_info[, 1, drop = FALSE],
  method = "liu",
  model = "continuous",
  weight.beta = c(1, 25),
  maf = variants_info[, "maf"]
)
#> [MiSTr] Linear regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters Pi_hat    SE CI_2.5 CI_97.5
#> 1    cluster1  0.248 0.321 -0.389   0.885
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 0.0527
#>   + PI (mean effect):  
#>     * Score = 0.601
#>     * P-value = 0.438
#>   + TAU (heterogeneous effect):  
#>     * Score = 118494.8
#>     * P-value = 0.0212

With “Average” Effect (π) and With Heterogeneity (τ) “Removed”

variants_info[, "effect"] # simulated effect
#>  g_variant1  g_variant2  g_variant3  g_variant4  g_variant5  g_variant6 
#>  -0.1871895  -0.6536863  -0.6635657  -5.7526938  -0.7180167  -0.3196585 
#>  g_variant7  g_variant8  g_variant9 g_variant10 
#>   2.0680578  -0.7141979  -1.8494894   1.2992623
get_same_effect <- names(which(variants_info[, "effect"] > 0))
mist(
  y = phenotypes[, "y_taupi"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes[, get_same_effect],
  Z = variants_info[get_same_effect, 1, drop = FALSE],
  method = "liu",
  model = "continuous",
  weight.beta = c(1, 25),
  maf = variants_info[get_same_effect, "maf"]
)
#> [MiSTr] Linear regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters Pi_hat    SE CI_2.5 CI_97.5
#> 1    cluster1  1.517 0.689  0.149   2.884
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 0.121
#>   + PI (mean effect):  
#>     * Score = 4.661
#>     * P-value = 0.0308
#>   + TAU (heterogeneous effect):  
#>     * Score = 166.235
#>     * P-value = 0.842

Binary Outcome

With Heterogeneity (τ)

mist(
  y = phenotypes[, "y_binary"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes,
  Z = variants_info[, 1, drop = FALSE],
  method = "liu",
  model = "binary"
)
#> [MiSTr] Logistic regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters Pi_hat    SE CI_2.5 CI_97.5    OR
#> 1    cluster1  1.274 0.344   0.66   2.019 3.576
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 6.54e-05
#>   + PI (mean effect):  
#>     * Score = 17.527
#>     * P-value = 2.83e-05
#>   + TAU (heterogeneous effect):  
#>     * Score = 5.4
#>     * P-value = 0.175

With Heterogeneity (τ) “Removed”

variants_info[, "effect"] # simulated effect
#>  g_variant1  g_variant2  g_variant3  g_variant4  g_variant5  g_variant6 
#>  -0.1871895  -0.6536863  -0.6635657  -5.7526938  -0.7180167  -0.3196585 
#>  g_variant7  g_variant8  g_variant9 g_variant10 
#>   2.0680578  -0.7141979  -1.8494894   1.2992623
get_same_effect <- names(which(variants_info[, "effect"] > 0))
mist(
  y = phenotypes[, "y_binary"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes[, get_same_effect],
  Z = variants_info[get_same_effect, 1, drop = FALSE],
  method = "liu",
  model = "binary"
)
#> [MiSTr] Logistic regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters Pi_hat    SE CI_2.5 CI_97.5    OR
#> 1    cluster1  1.187 0.645  0.062   2.679 3.279
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 0.154
#>   + PI (mean effect):  
#>     * Score = 3.969
#>     * P-value = 0.0463
#>   + TAU (heterogeneous effect):  
#>     * Score = 0.04
#>     * P-value = 0.766

Binary Outcome with Weights

With Heterogeneity (τ)

mist(
  y = phenotypes[, "y_binary"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes,
  Z = variants_info[, 1, drop = FALSE],
  method = "liu",
  model = "binary",
  weight.beta = c(1, 25),
  maf = variants_info[, "maf"]
)
#> [MiSTr] Logistic regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters Pi_hat    SE CI_2.5 CI_97.5    OR
#> 1    cluster1  1.274 0.344   0.66   2.019 3.576
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 9.29e-05
#>   + PI (mean effect):  
#>     * Score = 17.527
#>     * P-value = 2.83e-05
#>   + TAU (heterogeneous effect):  
#>     * Score = 616.283
#>     * P-value = 0.256

With Heterogeneity (τ) “Removed”

variants_info[, "effect"] # simulated effect
#>  g_variant1  g_variant2  g_variant3  g_variant4  g_variant5  g_variant6 
#>  -0.1871895  -0.6536863  -0.6635657  -5.7526938  -0.7180167  -0.3196585 
#>  g_variant7  g_variant8  g_variant9 g_variant10 
#>   2.0680578  -0.7141979  -1.8494894   1.2992623
get_same_effect <- names(which(variants_info[, "effect"] > 0))
mist(
  y = phenotypes[, "y_binary"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes[, get_same_effect],
  Z = variants_info[get_same_effect, 1, drop = FALSE],
  method = "liu",
  model = "binary",
  weight.beta = c(1, 25),
  maf = variants_info[get_same_effect, "maf"]
)
#> [MiSTr] Logistic regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters Pi_hat    SE CI_2.5 CI_97.5    OR
#> 1    cluster1  1.187 0.645  0.062   2.679 3.279
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 0.154
#>   + PI (mean effect):  
#>     * Score = 3.969
#>     * P-value = 0.0463
#>   + TAU (heterogeneous effect):  
#>     * Score = 4.691
#>     * P-value = 0.766