Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

include_reference = TRUE erroneously works with datawizard::contr.deviation() #962

Open
mattansb opened this issue Apr 10, 2024 · 6 comments · May be fixed by #966
Open

include_reference = TRUE erroneously works with datawizard::contr.deviation() #962

mattansb opened this issue Apr 10, 2024 · 6 comments · May be fixed by #966
Assignees
Labels
bug 🐛 Something isn't working help us 👀 Help is needed to implement something

Comments

@mattansb
Copy link
Member

include_reference = TRUE should only add the referent when standard dummy coding is used, but for some reason it also adds a (wrong) 0 when datawizard::contr.deviation() is used.

contrs <- list(
  # makes sense:
  treatment = contr.treatment,
  SAS = contr.SAS,
  
  # doens't make sense:
  deviation = datawizard::contr.deviation,
  sum = contr.sum,
  helmert = contr.helmert,
  poly = contr.poly,
  equalprior = bayestestR::contr.equalprior
)

data("mtcars")
mtcars$cyl <- factor(mtcars$cyl)

lapply(contrs, function(.c) {
  lm(mpg ~ cyl, data = mtcars, contrasts = list(cyl = .c))
}) |> 
  parameters::compare_parameters(include_reference = TRUE, select = "est")
#> Parameter    | treatment |   SAS | deviation |   sum | helmert |  poly | equalprior
#> -----------------------------------------------------------------------------------
#> (Intercept)  |     26.66 | 15.10 |     20.50 | 20.50 |   20.50 | 20.50 |      20.50
#> cyl (4)      |      0.00 | 11.56 |      0.00 |       |         |       |           
#> cyl (6)      |     -6.92 |  4.64 |     -6.92 |       |         |       |           
#> cyl (8)      |    -11.56 |  0.00 |    -11.56 |       |         |       |           
#> cyl (1)      |           |       |           |  6.16 |   -3.46 |       |      -3.28
#> cyl (2)      |           |       |           | -0.76 |   -2.70 |       |       7.55
#> cyl (.L)     |           |       |           |       |         | -8.18 |           
#> cyl (.Q)     |           |       |           |       |         |  0.93 |           
#> -----------------------------------------------------------------------------------
#> Observations |        32 |    32 |        32 |    32 |      32 |    32 |         32
@mattansb mattansb added the bug 🐛 Something isn't working label Apr 10, 2024
@strengejacke
Copy link
Member

Is there a safe way to find out which contrasts were used?

contrs <- list(
  # makes sense:
  treatment = contr.treatment,
  SAS = contr.SAS,
  
  # doens't make sense:
  deviation = datawizard::contr.deviation,
  sum = contr.sum,
  helmert = contr.helmert,
  poly = contr.poly,
  equalprior = bayestestR::contr.equalprior
)

data("mtcars")
mtcars$cyl <- factor(mtcars$cyl)

models <- lapply(contrs, function(.c) {
  lm(mpg ~ cyl, data = mtcars, contrasts = list(cyl = .c))
})

lapply(models, function(i) i$contrasts)
#> $treatment
#> $treatment$cyl
#>   6 8
#> 4 0 0
#> 6 1 0
#> 8 0 1
#> 
#> 
#> $SAS
#> $SAS$cyl
#>   4 6
#> 4 1 0
#> 6 0 1
#> 8 0 0
#> 
#> 
#> $deviation
#> $deviation$cyl
#>            6          8
#> 4 -0.3333333 -0.3333333
#> 6  0.6666667 -0.3333333
#> 8 -0.3333333  0.6666667
#> 
#> 
#> $sum
#> $sum$cyl
#>   [,1] [,2]
#> 4    1    0
#> 6    0    1
#> 8   -1   -1
#> 
#> 
#> $helmert
#> $helmert$cyl
#>   [,1] [,2]
#> 4   -1   -1
#> 6    1   -1
#> 8    0    2
#> 
#> 
#> $poly
#> $poly$cyl
#>              .L         .Q
#> 4 -7.071068e-01  0.4082483
#> 6 -7.850462e-17 -0.8164966
#> 8  7.071068e-01  0.4082483
#> 
#> 
#> $equalprior
#> $equalprior$cyl
#>         [,1]       [,2]
#> 4  0.0000000  0.8164966
#> 6 -0.7071068 -0.4082483
#> 8  0.7071068 -0.4082483

Created on 2024-04-10 with reprex v2.1.0

@mattansb
Copy link
Member Author

Is there a safe way to find out which contrasts were used?

No, I don't think so, but it should only be done if the contrast matrix has a row of all 0s.

contrs <- list(
  # makes sense:
  treatment = contr.treatment,
  SAS = contr.SAS,
  
  # doens't make sense:
  deviation = datawizard::contr.deviation,
  sum = contr.sum,
  helmert = contr.helmert,
  poly = contr.poly,
  equalprior = bayestestR::contr.equalprior
)

data("mtcars")
mtcars$cyl <- factor(mtcars$cyl)

models <- lapply(contrs, function(.c) {
  lm(mpg ~ cyl, data = mtcars, contrasts = list(cyl = .c))
})

all_zero_row <- function(m) {
  apply(m == 0, 1, all)
}

has_zeros_row <- function(m) {
  any(all_zero_row(m))
}

lapply(models, function(i) {
  lapply(i$contrasts, has_zeros_row)
})
#> $treatment
#> $treatment$cyl
#> [1] TRUE
#> 
#> 
#> $SAS
#> $SAS$cyl
#> [1] TRUE
#> 
#> 
#> $deviation
#> $deviation$cyl
#> [1] FALSE
#> 
#> 
#> $sum
#> $sum$cyl
#> [1] FALSE
#> 
#> 
#> $helmert
#> $helmert$cyl
#> [1] FALSE
#> 
#> 
#> $poly
#> $poly$cyl
#> [1] FALSE
#> 
#> 
#> $equalprior
#> $equalprior$cyl
#> [1] FALSE

Created on 2024-04-10 with reprex v2.1.0

@strengejacke
Copy link
Member

library(parameters)
data("mtcars")
mtcars$cyl <- factor(mtcars$cyl)
mtcars$gear <- factor(mtcars$gear)

m <- lm(mpg ~ cyl + gear, data = mtcars, contrasts = list(cyl = datawizard::contr.deviation))
model_parameters(m, include_reference = TRUE)
#> Parameter   | Coefficient |   SE |          95% CI | t(27) |      p
#> -------------------------------------------------------------------
#> (Intercept) |       19.70 | 1.18 | [ 17.28, 22.11] | 16.71 | < .001
#> cyl [6]     |       -6.66 | 1.63 | [-10.00, -3.31] | -4.09 | < .001
#> cyl [8]     |      -10.54 | 1.96 | [-14.56, -6.52] | -5.38 | < .001
#> gear [3]    |        0.00 |      |                 |       |       
#> gear [4]    |        1.32 | 1.93 | [ -2.63,  5.28] |  0.69 | 0.498 
#> gear [5]    |        1.50 | 1.85 | [ -2.31,  5.31] |  0.81 | 0.426
#> 
#> Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
#>   using a Wald t-distribution approximation.

m <- lm(mpg ~ cyl + gear, data = mtcars)
model_parameters(m, include_reference = TRUE)
#> Parameter   | Coefficient |   SE |          95% CI | t(27) |      p
#> -------------------------------------------------------------------
#> (Intercept) |       25.43 | 1.88 | [ 21.57, 29.29] | 13.52 | < .001
#> cyl [4]     |        0.00 |      |                 |       |       
#> cyl [6]     |       -6.66 | 1.63 | [-10.00, -3.31] | -4.09 | < .001
#> cyl [8]     |      -10.54 | 1.96 | [-14.56, -6.52] | -5.38 | < .001
#> gear [3]    |        0.00 |      |                 |       |       
#> gear [4]    |        1.32 | 1.93 | [ -2.63,  5.28] |  0.69 | 0.498 
#> gear [5]    |        1.50 | 1.85 | [ -2.31,  5.31] |  0.81 | 0.426
#> 
#> Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
#>   using a Wald t-distribution approximation.

m <- lm(
  mpg ~ cyl + gear,
  data = mtcars,
  contrasts = list(
    cyl = datawizard::contr.deviation,
    gear = contr.sum
  )
)
model_parameters(m, include_reference = TRUE)
#> Parameter   | Coefficient |   SE |          95% CI | t(27) |      p
#> -------------------------------------------------------------------
#> (Intercept) |       20.64 | 0.67 | [ 19.26, 22.01] | 30.76 | < .001
#> cyl [6]     |       -6.66 | 1.63 | [-10.00, -3.31] | -4.09 | < .001
#> cyl [8]     |      -10.54 | 1.96 | [-14.56, -6.52] | -5.38 | < .001
#> gear [1]    |       -0.94 | 1.09 | [ -3.18,  1.30] | -0.86 | 0.396 
#> gear [2]    |        0.38 | 1.11 | [ -1.90,  2.67] |  0.34 | 0.734
#> 
#> Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
#>   using a Wald t-distribution approximation.

m <- lm(
  mpg ~ cyl + gear,
  data = mtcars,
  contrasts = list(
    cyl = contr.SAS,
    gear = contr.sum
  )
)
model_parameters(m, include_reference = TRUE)
#> Parameter   | Coefficient |   SE |         95% CI | t(27) |      p
#> ------------------------------------------------------------------
#> (Intercept) |       15.83 | 1.24 | [13.28, 18.37] | 12.75 | < .001
#> cyl [8]     |        0.00 |      |                |       |       
#> cyl [4]     |       10.54 | 1.96 | [ 6.52, 14.56] |  5.38 | < .001
#> cyl [6]     |        3.89 | 1.88 | [ 0.03,  7.75] |  2.07 | 0.049 
#> gear [1]    |       -0.94 | 1.09 | [-3.18,  1.30] | -0.86 | 0.396 
#> gear [2]    |        0.38 | 1.11 | [-1.90,  2.67] |  0.34 | 0.734
#> 
#> Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
#>   using a Wald t-distribution approximation.

m <- lm(
  mpg ~ cyl + gear,
  data = mtcars,
  contrasts = list(
    cyl = contr.SAS,
    gear = contr.treatment
  )
)
model_parameters(m, include_reference = TRUE)
#> Parameter   | Coefficient |   SE |         95% CI | t(27) |      p
#> ------------------------------------------------------------------
#> (Intercept) |       14.89 | 0.92 | [13.00, 16.77] | 16.19 | < .001
#> cyl [8]     |        0.00 |      |                |       |       
#> cyl [4]     |       10.54 | 1.96 | [ 6.52, 14.56] |  5.38 | < .001
#> cyl [6]     |        3.89 | 1.88 | [ 0.03,  7.75] |  2.07 | 0.049 
#> gear [3]    |        0.00 |      |                |       |       
#> gear [4]    |        1.32 | 1.93 | [-2.63,  5.28] |  0.69 | 0.498 
#> gear [5]    |        1.50 | 1.85 | [-2.31,  5.31] |  0.81 | 0.426
#> 
#> Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
#>   using a Wald t-distribution approximation.

Created on 2024-04-26 with reprex v2.1.0

@strengejacke
Copy link
Member

This currently works for those model objects that have stored their contrasts as model$contrasts. We need to make this more robust for other models, too - or does every package with modelling functions save their contrasts like this?

@strengejacke
Copy link
Member

glmmTMB saves it as function...

library(glmmTMB)
data("mtcars")
mtcars$cyl <- factor(mtcars$cyl)
mtcars$gear <- factor(mtcars$gear)

m <- glmmTMB(mpg ~ cyl + (1 | gear), data = mtcars, contrasts = list(cyl = contr.sum))
m$modelInfo$contrasts
#> $cyl
#> function (n, contrasts = TRUE, sparse = FALSE) 
#> {
#>     if (length(n) <= 1L) {
#>         if (is.numeric(n) && length(n) == 1L && n > 1L) 
#>             levels <- seq_len(n)
#>         else stop("not enough degrees of freedom to define contrasts")
#>     }
#>     else levels <- n
#>     levels <- as.character(levels)
#>     cont <- .Diag(levels, sparse = sparse)
#>     if (contrasts) {
#>         cont <- cont[, -length(levels), drop = FALSE]
#>         cont[length(levels), ] <- -1
#>         colnames(cont) <- NULL
#>     }
#>     cont
#> }
#> <bytecode: 0x000002727afd2770>
#> <environment: namespace:stats>

Created on 2024-04-26 with reprex v2.1.0

@strengejacke strengejacke added the help us 👀 Help is needed to implement something label Apr 28, 2024
@mattansb
Copy link
Member Author

You can apply those functions (with some n>2) and then run the test:

all_zero_row <- function(m) {
  apply(m == 0, 1, all)
}

has_zeros_row <- function(m) {
  any(all_zero_row(m))
}


library(glmmTMB)
data("mtcars")
mtcars$cyl <- factor(mtcars$cyl)
mtcars$gear <- factor(mtcars$gear)

m <- glmmTMB(
  mpg ~ cyl + gear,
  data = mtcars,
  contrasts = list(cyl = contr.sum, gear = contr.treatment)
)

glmmtmb_contr_foos <- m$modelInfo$contrasts

glmmtmb_contr_foos |> 
  lapply(\(f) f(3)) |> 
  lapply(has_zeros_row)
#> $cyl
#> [1] FALSE
#> 
#> $gear
#> [1] TRUE
#> 

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug 🐛 Something isn't working help us 👀 Help is needed to implement something
Projects
None yet
Development

Successfully merging a pull request may close this issue.

2 participants