DeclareDesign Community

Diagnosing two-by-two factorial designs

#1

A friend asked for some code to look at 2-by-2 factorials. Much of this can be done with DesignLibrary::two_by_two_designer(), but I thought it was useful to illustrate how it can be done in “base DeclareDesign” too.

Here is the code for a ‘static’ 2-by-2:

# Clear memory and load packages ------------------------------------------

rm(list = ls())

library(DeclareDesign)

# Set parameters ----------------------------------------------------------

N <- 100
prob_A <- 0.5
prob_B <- 0.5
effect_A <- .01
effect_B <- .01
interaction <- .01

design <- 
  declare_population(
    N = N, 
    # Error is standard normally distributed,
    # so effects will be in SDs
    u = rnorm(N)) +
  declare_potential_outcomes(
    # This will generate four potential outcomes, as a function of 
    # the parameters above and the noise in u. We told it that A and B are
    # treatment variables, so it's going to assume that they are binary, 
    # and then it knows what the other parameters are
    formula = Y ~ A * effect_A + B * effect_B + A*B * interaction + u,
    assignment_variables = c("A","B")
  ) +
  declare_estimands(
    # "Pure" effect of A (B set to 0)
    ate_A = mean(Y_A_1_B_0 - Y_A_0_B_0),
    # "Pure" effect of B (A set to 0)
    ate_B = mean(Y_A_0_B_1 - Y_A_0_B_0),
    # Additional effect of A (B) when B (A) is set to 1
    interaction = mean((Y_A_1_B_1 - Y_A_0_B_1) - (Y_A_1_B_0 - Y_A_0_B_0))
  ) +
  # Assign variables
  declare_assignment(prob = prob_A, assignment_variable = A) +
  declare_assignment(prob = prob_B, assignment_variable = B, blocks = A) +
  # Reveal Y
  declare_reveal(Y_variables = Y, assignment_variables = c(A,B)) +
  # Estimate "pure" effect of A and B, assuming no interaction
  declare_estimator(Y ~ A + B,
                    model = lm_robust,
                    term = c("A", "B"),
                    estimand = c("ate_A", "ate_B"), 
                    label = "No Interaction") +
  # Estimate "pure" effect of A, B, and interaction allowing for interaction
  declare_estimator(Y ~ A + B + A:B,
                    model = lm_robust,
                    term = c("A", "B", "A:B"), 
                    estimand = c("ate_A", "ate_B", "interaction"), 
                    label = "Interaction")

# View summary
summary(design)
# View one draw of dummy data
draw_data(design)
# View one draw of estimates
draw_estimates(design)
# View one draw of estimands
draw_estimands(design)
# Simulate 5 draws of design
simulate_design(design, sims = 5)
# Simulate and diagnose using 500 draws from design
diagnose_design(design, sims = 500)
# Redesign to have a bigger N (this works with any parameters above)
bigger_design <- redesign(design, N = 200)
# Compare performance across Ns
diagnose_design(design, bigger_design)
# Remove interaction
no_interaction_design <- redesign(design, interaction = 0)
# Compare performance across interactions
diagnose_design(design, no_interaction_design)

And here is the code for making a designer function (just like two_by_two_designer()!) and generating some different kinds of diagnostic plots.

# Clear memory and load packages ------------------------------------------

rm(list = ls())

library(DeclareDesign)
library(DesignLibrary)
library(tidyverse)

# Create function that returns designs ------------------------------------

make_factorial <- function(
	N = 100,
	prob_A = 0.5,
	prob_B = 0.5,
	effect_A = .01,
	effect_B = .01,
	interaction = .01
){
	declare_population(
		N = N, 
		u = rnorm(N)) +
		declare_potential_outcomes(
			formula = Y ~ A * effect_A + B * effect_B + A*B * interaction + u,
			assignment_variables = c("A","B")
		) +
		declare_estimands(
			ate_A = mean(Y_A_1_B_0 - Y_A_0_B_0),
			ate_B = mean(Y_A_0_B_1 - Y_A_0_B_0),
			interaction = mean((Y_A_1_B_1 - Y_A_0_B_1) - (Y_A_1_B_0 - Y_A_0_B_0))
		) +
		declare_assignment(prob = prob_A, assignment_variable = A) +
		declare_assignment(prob = prob_B, assignment_variable = B, blocks = A) +
		declare_reveal(Y_variables = Y, assignment_variables = c(A,B)) +
		declare_estimator(Y ~ A + B,
											model = lm_robust,
											term = c("A", "B"),
											estimand = c("ate_A", "ate_B"), 
											label = "OLS w/o interaction") +
		declare_estimator(Y ~ A + B + A:B,
											model = lm_robust,
											term = c("A", "B", "A:B"), 
											estimand = c("ate_A", "ate_B", "interaction"), 
											label = "OLS w interaction")
}

# Use function ------------------------------------------------------------

# Make a design with N of 10, holding other parameters constant at default vals
N10_design <- make_factorial(N = 10)

# Make a design with N of 50 and no effect for A
no_A_design <- make_factorial(N = 50, effect_A = 0, effect_B = .5, interaction = 0)

# Compare
diagnose_design(N10_design, no_A_design)

# Expand designs ----------------------------------------------------------

# List of designs increasing N while holding other params constant
N_designs <- expand_design(designer = make_factorial, 
													 N = list(500, 1000, 1500),
													 interaction = .5,
													 effect_A = 0, 
													 effect_B = 0)
N_designs
N_diagnoses <- diagnose_designs(N_designs, sims = 100)
N_diagnoses

# Plot power by N, estimator, and estimand
get_diagnosands(N_diagnoses) %>% 
	ggplot(aes(x = N, 
						 y = power, 
						 group = estimand_label, 
						 color = estimand_label)) +
	geom_point() +
	geom_line() + 
	facet_wrap(~ estimator_label)

# Plot bias by N, estimator, and estimand
get_diagnosands(N_diagnoses) %>% 
	ggplot(aes(x = N, 
						 y = bias, 
						 group = estimand_label, 
						 color = estimand_label)) +
	geom_point() +
	geom_line() + 
	facet_wrap(~ estimator_label)

# Plot bias by distribution of estimates 
get_simulations(N_diagnoses) %>% 
	ggplot(aes(x = estimate, 
						 group = estimand_label, 
						 color = estimand_label,
						 fill = estimand_label)) +
	geom_histogram() +
	geom_vline(data = get_diagnosands(N_diagnoses), aes(xintercept = mean_estimate), linetype = "dashed") +
	geom_vline(data = get_diagnosands(N_diagnoses), aes(xintercept = mean_estimand)) +
	facet_wrap(~ estimator_label + estimand_label) 

1 Like