`include_reference = TRUE` erroneously works with `datawizard::contr.deviation()` #962

mattansb · 2024-04-10T05:39:26Z

include_reference = TRUE should only add the referent when standard dummy coding is used, but for some reason it also adds a (wrong) 0 when datawizard::contr.deviation() is used.

contrs <- list(
  # makes sense:
  treatment = contr.treatment,
  SAS = contr.SAS,
  
  # doens't make sense:
  deviation = datawizard::contr.deviation,
  sum = contr.sum,
  helmert = contr.helmert,
  poly = contr.poly,
  equalprior = bayestestR::contr.equalprior
)

data("mtcars")
mtcars$cyl <- factor(mtcars$cyl)

lapply(contrs, function(.c) {
  lm(mpg ~ cyl, data = mtcars, contrasts = list(cyl = .c))
}) |> 
  parameters::compare_parameters(include_reference = TRUE, select = "est")
#> Parameter    | treatment |   SAS | deviation |   sum | helmert |  poly | equalprior
#> -----------------------------------------------------------------------------------
#> (Intercept)  |     26.66 | 15.10 |     20.50 | 20.50 |   20.50 | 20.50 |      20.50
#> cyl (4)      |      0.00 | 11.56 |      0.00 |       |         |       |           
#> cyl (6)      |     -6.92 |  4.64 |     -6.92 |       |         |       |           
#> cyl (8)      |    -11.56 |  0.00 |    -11.56 |       |         |       |           
#> cyl (1)      |           |       |           |  6.16 |   -3.46 |       |      -3.28
#> cyl (2)      |           |       |           | -0.76 |   -2.70 |       |       7.55
#> cyl (.L)     |           |       |           |       |         | -8.18 |           
#> cyl (.Q)     |           |       |           |       |         |  0.93 |           
#> -----------------------------------------------------------------------------------
#> Observations |        32 |    32 |        32 |    32 |      32 |    32 |         32

The text was updated successfully, but these errors were encountered:

strengejacke · 2024-04-10T06:07:13Z

Is there a safe way to find out which contrasts were used?

contrs <- list(
  # makes sense:
  treatment = contr.treatment,
  SAS = contr.SAS,
  
  # doens't make sense:
  deviation = datawizard::contr.deviation,
  sum = contr.sum,
  helmert = contr.helmert,
  poly = contr.poly,
  equalprior = bayestestR::contr.equalprior
)

data("mtcars")
mtcars$cyl <- factor(mtcars$cyl)

models <- lapply(contrs, function(.c) {
  lm(mpg ~ cyl, data = mtcars, contrasts = list(cyl = .c))
})

lapply(models, function(i) i$contrasts)
#> $treatment
#> $treatment$cyl
#>   6 8
#> 4 0 0
#> 6 1 0
#> 8 0 1
#> 
#> 
#> $SAS
#> $SAS$cyl
#>   4 6
#> 4 1 0
#> 6 0 1
#> 8 0 0
#> 
#> 
#> $deviation
#> $deviation$cyl
#>            6          8
#> 4 -0.3333333 -0.3333333
#> 6  0.6666667 -0.3333333
#> 8 -0.3333333  0.6666667
#> 
#> 
#> $sum
#> $sum$cyl
#>   [,1] [,2]
#> 4    1    0
#> 6    0    1
#> 8   -1   -1
#> 
#> 
#> $helmert
#> $helmert$cyl
#>   [,1] [,2]
#> 4   -1   -1
#> 6    1   -1
#> 8    0    2
#> 
#> 
#> $poly
#> $poly$cyl
#>              .L         .Q
#> 4 -7.071068e-01  0.4082483
#> 6 -7.850462e-17 -0.8164966
#> 8  7.071068e-01  0.4082483
#> 
#> 
#> $equalprior
#> $equalprior$cyl
#>         [,1]       [,2]
#> 4  0.0000000  0.8164966
#> 6 -0.7071068 -0.4082483
#> 8  0.7071068 -0.4082483

^{Created on 2024-04-10 with reprex v2.1.0}

mattansb · 2024-04-10T08:20:14Z

Is there a safe way to find out which contrasts were used?

No, I don't think so, but it should only be done if the contrast matrix has a row of all 0s.

contrs <- list(
  # makes sense:
  treatment = contr.treatment,
  SAS = contr.SAS,
  
  # doens't make sense:
  deviation = datawizard::contr.deviation,
  sum = contr.sum,
  helmert = contr.helmert,
  poly = contr.poly,
  equalprior = bayestestR::contr.equalprior
)

data("mtcars")
mtcars$cyl <- factor(mtcars$cyl)

models <- lapply(contrs, function(.c) {
  lm(mpg ~ cyl, data = mtcars, contrasts = list(cyl = .c))
})

all_zero_row <- function(m) {
  apply(m == 0, 1, all)
}

has_zeros_row <- function(m) {
  any(all_zero_row(m))
}

lapply(models, function(i) {
  lapply(i$contrasts, has_zeros_row)
})
#> $treatment
#> $treatment$cyl
#> [1] TRUE
#> 
#> 
#> $SAS
#> $SAS$cyl
#> [1] TRUE
#> 
#> 
#> $deviation
#> $deviation$cyl
#> [1] FALSE
#> 
#> 
#> $sum
#> $sum$cyl
#> [1] FALSE
#> 
#> 
#> $helmert
#> $helmert$cyl
#> [1] FALSE
#> 
#> 
#> $poly
#> $poly$cyl
#> [1] FALSE
#> 
#> 
#> $equalprior
#> $equalprior$cyl
#> [1] FALSE

^{Created on 2024-04-10 with reprex v2.1.0}

…deviation()` Fixes #962

strengejacke · 2024-04-26T06:51:55Z

library(parameters)
data("mtcars")
mtcars$cyl <- factor(mtcars$cyl)
mtcars$gear <- factor(mtcars$gear)

m <- lm(mpg ~ cyl + gear, data = mtcars, contrasts = list(cyl = datawizard::contr.deviation))
model_parameters(m, include_reference = TRUE)
#> Parameter   | Coefficient |   SE |          95% CI | t(27) |      p
#> -------------------------------------------------------------------
#> (Intercept) |       19.70 | 1.18 | [ 17.28, 22.11] | 16.71 | < .001
#> cyl [6]     |       -6.66 | 1.63 | [-10.00, -3.31] | -4.09 | < .001
#> cyl [8]     |      -10.54 | 1.96 | [-14.56, -6.52] | -5.38 | < .001
#> gear [3]    |        0.00 |      |                 |       |       
#> gear [4]    |        1.32 | 1.93 | [ -2.63,  5.28] |  0.69 | 0.498 
#> gear [5]    |        1.50 | 1.85 | [ -2.31,  5.31] |  0.81 | 0.426
#> 
#> Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
#>   using a Wald t-distribution approximation.

m <- lm(mpg ~ cyl + gear, data = mtcars)
model_parameters(m, include_reference = TRUE)
#> Parameter   | Coefficient |   SE |          95% CI | t(27) |      p
#> -------------------------------------------------------------------
#> (Intercept) |       25.43 | 1.88 | [ 21.57, 29.29] | 13.52 | < .001
#> cyl [4]     |        0.00 |      |                 |       |       
#> cyl [6]     |       -6.66 | 1.63 | [-10.00, -3.31] | -4.09 | < .001
#> cyl [8]     |      -10.54 | 1.96 | [-14.56, -6.52] | -5.38 | < .001
#> gear [3]    |        0.00 |      |                 |       |       
#> gear [4]    |        1.32 | 1.93 | [ -2.63,  5.28] |  0.69 | 0.498 
#> gear [5]    |        1.50 | 1.85 | [ -2.31,  5.31] |  0.81 | 0.426
#> 
#> Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
#>   using a Wald t-distribution approximation.

m <- lm(
  mpg ~ cyl + gear,
  data = mtcars,
  contrasts = list(
    cyl = datawizard::contr.deviation,
    gear = contr.sum
  )
)
model_parameters(m, include_reference = TRUE)
#> Parameter   | Coefficient |   SE |          95% CI | t(27) |      p
#> -------------------------------------------------------------------
#> (Intercept) |       20.64 | 0.67 | [ 19.26, 22.01] | 30.76 | < .001
#> cyl [6]     |       -6.66 | 1.63 | [-10.00, -3.31] | -4.09 | < .001
#> cyl [8]     |      -10.54 | 1.96 | [-14.56, -6.52] | -5.38 | < .001
#> gear [1]    |       -0.94 | 1.09 | [ -3.18,  1.30] | -0.86 | 0.396 
#> gear [2]    |        0.38 | 1.11 | [ -1.90,  2.67] |  0.34 | 0.734
#> 
#> Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
#>   using a Wald t-distribution approximation.

m <- lm(
  mpg ~ cyl + gear,
  data = mtcars,
  contrasts = list(
    cyl = contr.SAS,
    gear = contr.sum
  )
)
model_parameters(m, include_reference = TRUE)
#> Parameter   | Coefficient |   SE |         95% CI | t(27) |      p
#> ------------------------------------------------------------------
#> (Intercept) |       15.83 | 1.24 | [13.28, 18.37] | 12.75 | < .001
#> cyl [8]     |        0.00 |      |                |       |       
#> cyl [4]     |       10.54 | 1.96 | [ 6.52, 14.56] |  5.38 | < .001
#> cyl [6]     |        3.89 | 1.88 | [ 0.03,  7.75] |  2.07 | 0.049 
#> gear [1]    |       -0.94 | 1.09 | [-3.18,  1.30] | -0.86 | 0.396 
#> gear [2]    |        0.38 | 1.11 | [-1.90,  2.67] |  0.34 | 0.734
#> 
#> Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
#>   using a Wald t-distribution approximation.

m <- lm(
  mpg ~ cyl + gear,
  data = mtcars,
  contrasts = list(
    cyl = contr.SAS,
    gear = contr.treatment
  )
)
model_parameters(m, include_reference = TRUE)
#> Parameter   | Coefficient |   SE |         95% CI | t(27) |      p
#> ------------------------------------------------------------------
#> (Intercept) |       14.89 | 0.92 | [13.00, 16.77] | 16.19 | < .001
#> cyl [8]     |        0.00 |      |                |       |       
#> cyl [4]     |       10.54 | 1.96 | [ 6.52, 14.56] |  5.38 | < .001
#> cyl [6]     |        3.89 | 1.88 | [ 0.03,  7.75] |  2.07 | 0.049 
#> gear [3]    |        0.00 |      |                |       |       
#> gear [4]    |        1.32 | 1.93 | [-2.63,  5.28] |  0.69 | 0.498 
#> gear [5]    |        1.50 | 1.85 | [-2.31,  5.31] |  0.81 | 0.426
#> 
#> Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
#>   using a Wald t-distribution approximation.

^{Created on 2024-04-26 with reprex v2.1.0}

strengejacke · 2024-04-26T06:54:40Z

This currently works for those model objects that have stored their contrasts as model$contrasts. We need to make this more robust for other models, too - or does every package with modelling functions save their contrasts like this?

strengejacke · 2024-04-26T06:59:52Z

glmmTMB saves it as function...

library(glmmTMB)
data("mtcars")
mtcars$cyl <- factor(mtcars$cyl)
mtcars$gear <- factor(mtcars$gear)

m <- glmmTMB(mpg ~ cyl + (1 | gear), data = mtcars, contrasts = list(cyl = contr.sum))
m$modelInfo$contrasts
#> $cyl
#> function (n, contrasts = TRUE, sparse = FALSE) 
#> {
#>     if (length(n) <= 1L) {
#>         if (is.numeric(n) && length(n) == 1L && n > 1L) 
#>             levels <- seq_len(n)
#>         else stop("not enough degrees of freedom to define contrasts")
#>     }
#>     else levels <- n
#>     levels <- as.character(levels)
#>     cont <- .Diag(levels, sparse = sparse)
#>     if (contrasts) {
#>         cont <- cont[, -length(levels), drop = FALSE]
#>         cont[length(levels), ] <- -1
#>         colnames(cont) <- NULL
#>     }
#>     cont
#> }
#> <bytecode: 0x000002727afd2770>
#> <environment: namespace:stats>

^{Created on 2024-04-26 with reprex v2.1.0}

mattansb · 2024-04-30T20:24:20Z

You can apply those functions (with some n>2) and then run the test:

all_zero_row <- function(m) {
  apply(m == 0, 1, all)
}

has_zeros_row <- function(m) {
  any(all_zero_row(m))
}


library(glmmTMB)
data("mtcars")
mtcars$cyl <- factor(mtcars$cyl)
mtcars$gear <- factor(mtcars$gear)

m <- glmmTMB(
  mpg ~ cyl + gear,
  data = mtcars,
  contrasts = list(cyl = contr.sum, gear = contr.treatment)
)

glmmtmb_contr_foos <- m$modelInfo$contrasts

glmmtmb_contr_foos |> 
  lapply(\(f) f(3)) |> 
  lapply(has_zeros_row)
#> $cyl
#> [1] FALSE
#> 
#> $gear
#> [1] TRUE
#>

mattansb added the bug 🐛 Something isn't working label Apr 10, 2024

strengejacke self-assigned this Apr 26, 2024

strengejacke added a commit that referenced this issue Apr 26, 2024

include_reference = TRUE erroneously works with `datawizard::contr.…

f1ceaa7

…deviation()` Fixes #962

strengejacke linked a pull request Apr 26, 2024 that will close this issue

include_reference = TRUE erroneously works with datawizard::contr.deviation() #966

Open

strengejacke added the help us 👀 Help is needed to implement something label Apr 28, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

`include_reference = TRUE` erroneously works with `datawizard::contr.deviation()` #962

`include_reference = TRUE` erroneously works with `datawizard::contr.deviation()` #962

mattansb commented Apr 10, 2024

strengejacke commented Apr 10, 2024

mattansb commented Apr 10, 2024

strengejacke commented Apr 26, 2024

strengejacke commented Apr 26, 2024

strengejacke commented Apr 26, 2024

mattansb commented Apr 30, 2024

include_reference = TRUE erroneously works with datawizard::contr.deviation() #962

include_reference = TRUE erroneously works with datawizard::contr.deviation() #962

Comments

mattansb commented Apr 10, 2024

strengejacke commented Apr 10, 2024

mattansb commented Apr 10, 2024

strengejacke commented Apr 26, 2024

strengejacke commented Apr 26, 2024

strengejacke commented Apr 26, 2024

mattansb commented Apr 30, 2024

`include_reference = TRUE` erroneously works with `datawizard::contr.deviation()` #962

`include_reference = TRUE` erroneously works with `datawizard::contr.deviation()` #962