set.seed(1)
library('tidyverse')
n <- 1000000
beta <- 0.1
dat <- tibble(x = runif(n, min = -6, max = 0),
slope = runif(n, 0, 0.1),
lql = -3, # runif(n, min = -5, max = -4),
p_hat = plogis(- mean(slope) + beta * slope),
y = rbinom(n, 1, p_hat),
x_observed = ifelse(x < lql, 0, x),
under_threshold = ifelse(x < lql, 'yes', 'no'))
(fm <- glm(y ~ slope + under_threshold, family = binomial(), data = dat))
Call: glm(formula = y ~ slope + under_threshold, family = binomial(),
data = dat)
Coefficients:
(Intercept) slope under_thresholdyes
-0.048893 0.087039 -0.002241
Degrees of Freedom: 999999 Total (i.e. Null); 999997 Residual
Null Deviance: 1386000
Residual Deviance: 1386000 AIC: 1386000
Call:
glm(formula = y ~ slope + under_threshold, family = binomial(),
data = dat)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.048893 0.004473 -10.931 <2e-16 ***
slope 0.087039 0.069321 1.256 0.209
under_thresholdyes -0.002241 0.004001 -0.560 0.575
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1385773 on 999999 degrees of freedom
Residual deviance: 1385771 on 999997 degrees of freedom
AIC: 1385777
Number of Fisher Scoring iterations: 3