A friend asked for some code to look at 2-by-2 factorials. Much of this can be done with DesignLibrary::two_by_two_designer()
, but I thought it was useful to illustrate how it can be done in “base DeclareDesign” too.
Here is the code for a ‘static’ 2-by-2:
# Clear memory and load packages ------------------------------------------
rm(list = ls())
library(DeclareDesign)
# Set parameters ----------------------------------------------------------
N <- 100
prob_A <- 0.5
prob_B <- 0.5
effect_A <- .01
effect_B <- .01
interaction <- .01
design <-
declare_population(
N = N,
# Error is standard normally distributed,
# so effects will be in SDs
u = rnorm(N)) +
declare_potential_outcomes(
# This will generate four potential outcomes, as a function of
# the parameters above and the noise in u. We told it that A and B are
# treatment variables, so it's going to assume that they are binary,
# and then it knows what the other parameters are
formula = Y ~ A * effect_A + B * effect_B + A*B * interaction + u,
assignment_variables = c("A","B")
) +
declare_estimands(
# "Pure" effect of A (B set to 0)
ate_A = mean(Y_A_1_B_0 - Y_A_0_B_0),
# "Pure" effect of B (A set to 0)
ate_B = mean(Y_A_0_B_1 - Y_A_0_B_0),
# Additional effect of A (B) when B (A) is set to 1
interaction = mean((Y_A_1_B_1 - Y_A_0_B_1) - (Y_A_1_B_0 - Y_A_0_B_0))
) +
# Assign variables
declare_assignment(prob = prob_A, assignment_variable = A) +
declare_assignment(prob = prob_B, assignment_variable = B, blocks = A) +
# Reveal Y
declare_reveal(Y_variables = Y, assignment_variables = c(A,B)) +
# Estimate "pure" effect of A and B, assuming no interaction
declare_estimator(Y ~ A + B,
model = lm_robust,
term = c("A", "B"),
estimand = c("ate_A", "ate_B"),
label = "No Interaction") +
# Estimate "pure" effect of A, B, and interaction allowing for interaction
declare_estimator(Y ~ A + B + A:B,
model = lm_robust,
term = c("A", "B", "A:B"),
estimand = c("ate_A", "ate_B", "interaction"),
label = "Interaction")
# View summary
summary(design)
# View one draw of dummy data
draw_data(design)
# View one draw of estimates
draw_estimates(design)
# View one draw of estimands
draw_estimands(design)
# Simulate 5 draws of design
simulate_design(design, sims = 5)
# Simulate and diagnose using 500 draws from design
diagnose_design(design, sims = 500)
# Redesign to have a bigger N (this works with any parameters above)
bigger_design <- redesign(design, N = 200)
# Compare performance across Ns
diagnose_design(design, bigger_design)
# Remove interaction
no_interaction_design <- redesign(design, interaction = 0)
# Compare performance across interactions
diagnose_design(design, no_interaction_design)
And here is the code for making a designer function (just like two_by_two_designer()
!) and generating some different kinds of diagnostic plots.
# Clear memory and load packages ------------------------------------------
rm(list = ls())
library(DeclareDesign)
library(DesignLibrary)
library(tidyverse)
# Create function that returns designs ------------------------------------
make_factorial <- function(
N = 100,
prob_A = 0.5,
prob_B = 0.5,
effect_A = .01,
effect_B = .01,
interaction = .01
){
declare_population(
N = N,
u = rnorm(N)) +
declare_potential_outcomes(
formula = Y ~ A * effect_A + B * effect_B + A*B * interaction + u,
assignment_variables = c("A","B")
) +
declare_estimands(
ate_A = mean(Y_A_1_B_0 - Y_A_0_B_0),
ate_B = mean(Y_A_0_B_1 - Y_A_0_B_0),
interaction = mean((Y_A_1_B_1 - Y_A_0_B_1) - (Y_A_1_B_0 - Y_A_0_B_0))
) +
declare_assignment(prob = prob_A, assignment_variable = A) +
declare_assignment(prob = prob_B, assignment_variable = B, blocks = A) +
declare_reveal(Y_variables = Y, assignment_variables = c(A,B)) +
declare_estimator(Y ~ A + B,
model = lm_robust,
term = c("A", "B"),
estimand = c("ate_A", "ate_B"),
label = "OLS w/o interaction") +
declare_estimator(Y ~ A + B + A:B,
model = lm_robust,
term = c("A", "B", "A:B"),
estimand = c("ate_A", "ate_B", "interaction"),
label = "OLS w interaction")
}
# Use function ------------------------------------------------------------
# Make a design with N of 10, holding other parameters constant at default vals
N10_design <- make_factorial(N = 10)
# Make a design with N of 50 and no effect for A
no_A_design <- make_factorial(N = 50, effect_A = 0, effect_B = .5, interaction = 0)
# Compare
diagnose_design(N10_design, no_A_design)
# Expand designs ----------------------------------------------------------
# List of designs increasing N while holding other params constant
N_designs <- expand_design(designer = make_factorial,
N = list(500, 1000, 1500),
interaction = .5,
effect_A = 0,
effect_B = 0)
N_designs
N_diagnoses <- diagnose_designs(N_designs, sims = 100)
N_diagnoses
# Plot power by N, estimator, and estimand
get_diagnosands(N_diagnoses) %>%
ggplot(aes(x = N,
y = power,
group = estimand_label,
color = estimand_label)) +
geom_point() +
geom_line() +
facet_wrap(~ estimator_label)
# Plot bias by N, estimator, and estimand
get_diagnosands(N_diagnoses) %>%
ggplot(aes(x = N,
y = bias,
group = estimand_label,
color = estimand_label)) +
geom_point() +
geom_line() +
facet_wrap(~ estimator_label)
# Plot bias by distribution of estimates
get_simulations(N_diagnoses) %>%
ggplot(aes(x = estimate,
group = estimand_label,
color = estimand_label,
fill = estimand_label)) +
geom_histogram() +
geom_vline(data = get_diagnosands(N_diagnoses), aes(xintercept = mean_estimate), linetype = "dashed") +
geom_vline(data = get_diagnosands(N_diagnoses), aes(xintercept = mean_estimand)) +
facet_wrap(~ estimator_label + estimand_label)