diff --git a/NAMESPACE b/NAMESPACE
index 484df87..2d7a0f8 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -6,11 +6,13 @@ S3method(glance,dbreg)
 S3method(plot,dbbinsreg)
 S3method(predict,dbreg)
 S3method(print,dbbinsreg)
+S3method(print,dbivreg)
 S3method(print,dbreg)
 S3method(tidy,dbreg)
 S3method(tinyplot,dbbinsreg)
 S3method(vcov,dbreg)
 export(dbbinsreg)
+export(dbivreg)
 export(dbreg)
 export(glance)
 export(gof)
diff --git a/R/dbivreg.R b/R/dbivreg.R
new file mode 100644
index 0000000..0983bf5
--- /dev/null
+++ b/R/dbivreg.R
@@ -0,0 +1,1940 @@
+#' Instrumental-variables regression on a database backend
+#'
+#' @md
+#' @description
+#' Estimates a linear IV model via exact sufficient statistics on a database
+#' backend. The current implementation supports three exact strategies:
+#'
+#' - `strategy = "moments"` for models without absorbed fixed effects.
+#' - `strategy = "compress"` for repeated no-FE design rows.
+#' - `strategy = "demean"` for one or more absorbed fixed effects.
+#'
+#' Multi-way fixed effects use exact alternating projections when there are
+#' more than two fixed effects, or when two-way fixed effects are unbalanced or
+#' weights are supplied.
+#'
+#' @param fml A fixest-style IV formula. Use `y ~ x1 + x2 | d ~ z` for models
+#'   without absorbed fixed effects, and `y ~ x1 + x2 | fe1 + fe2 + fe3 | d ~ z`
+#'   when fixed effects are present. The IV block must always be the final
+#'   pipe-separated block.
+#' @param conn Database connection, or `NULL` to create an ephemeral DuckDB
+#'   connection.
+#' @param table,data,path Mutually exclusive data source arguments with
+#'   precedence `table > data > path`.
+#' @param weights Optional character string naming an observation-weight column.
+#' @param vcov Character string or one-sided clustering formula. Character
+#'   options are `"iid"` and `"hc1"`.
+#' @param strategy Character string selecting the IV backend. Supported values
+#'   are `"auto"` (default), `"moments"`, `"demean"` (alias `"within"`),
+#'   `"compress"`, and `"mundlak"`. The latter currently errors because it is
+#'   not yet implemented for IV.
+#' @param cluster Optional clustering variable, supplied as a one-sided formula
+#'   or character string.
+#' @param sql_only Logical indicating whether to return only the underlying SQL
+#'   query.
+#' @param data_only Logical indicating whether to return only the aggregated
+#'   sufficient statistics.
+#' @param drop_missings Logical indicating whether incomplete cases should be
+#'   dropped.
+#' @param verbose Logical indicating whether progress messages are printed.
+#' @param ... Additional unused arguments.
+#'
+#' @return A list of class `"dbivreg"` and `"dbreg"`.
+#' @export
+dbivreg = function(
+  fml,
+  conn = NULL,
+  table = NULL,
+  data = NULL,
+  path = NULL,
+  weights = NULL,
+  vcov = c("iid", "hc1"),
+  strategy = c("auto", "moments", "demean", "within", "compress", "mundlak"),
+  cluster = NULL,
+  sql_only = FALSE,
+  data_only = FALSE,
+  drop_missings = TRUE,
+  verbose = getOption("dbreg.verbose", FALSE),
+  ...
+) {
+  verbose = isTRUE(verbose)
+  strategy = match.arg(strategy)
+  if (strategy == "within") {
+    strategy = "demean"
+  }
+  dots = list(...)
+  if (length(dots)) {
+    dot_names = names(dots)
+    if (is.null(dot_names)) {
+      dot_names = rep("", length(dots))
+    }
+    legacy_args = intersect(dot_names, c("endog", "instruments"))
+    if (length(legacy_args) > 0) {
+      stop(
+        "`endog` and `instruments` are no longer supported. ",
+        "Use fixest-style IV formulas like `y ~ x | d ~ z` or `y ~ x | fe1 + fe2 | d ~ z`."
+      )
+    }
+    stop(
+      "Unused arguments: ",
+      paste(ifelse(nzchar(dot_names), paste0("`", dot_names, "`"), "<unnamed>"), collapse = ", ")
+    )
+  }
+
+  vcov_parsed = parse_vcov_args(vcov, cluster, valid_types = c("iid", "hc1"))
+  inputs = process_dbivreg_inputs(
+    fml = fml,
+    conn = conn,
+    table = table,
+    data = data,
+    path = path,
+    weights = weights,
+    vcov = vcov_parsed[["vcov_type"]],
+    cluster = vcov_parsed[["cluster_var"]],
+    strategy = strategy,
+    sql_only = sql_only,
+    data_only = data_only,
+    drop_missings = drop_missings,
+    verbose = verbose
+  )
+
+  cleanup_conn = function() {
+    if (isTRUE(inputs[["own_conn"]]) && dbIsValid(inputs[["conn"]])) {
+      dbDisconnect(inputs[["conn"]], shutdown = TRUE)
+    }
+  }
+  on.exit(cleanup_conn(), add = TRUE)
+
+  cleanup_registered = function() {
+    if (!is.null(inputs[["registered_table"]]) && dbIsValid(inputs[["conn"]])) {
+      try(duckdb_unregister(inputs[["conn"]], inputs[["registered_table"]]), silent = TRUE)
+    }
+  }
+  on.exit(cleanup_registered(), add = TRUE)
+
+  chosen_strategy = choose_dbivreg_strategy(inputs)
+
+  result = switch(
+    chosen_strategy,
+    "moments" = execute_iv_moments_strategy(inputs),
+    "compress" = execute_iv_compress_strategy(inputs),
+    "demean" = execute_iv_demean_strategy(inputs),
+    stop("Unknown IV strategy: ", chosen_strategy)
+  )
+
+  if (inputs[["sql_only"]]) {
+    cat(result)
+    return(invisible(result))
+  }
+  if (inputs[["data_only"]]) {
+    return(result)
+  }
+
+  result[["strategy"]] = chosen_strategy
+  class(result) = c("dbivreg", "dbreg")
+  result
+}
+
+#' Process and validate dbivreg inputs
+#'
+#' Returns an environment so downstream strategy helpers follow the post-#65
+#' dbreg convention of reading shared state with `inputs[["..."]]`.
+#'
+#' @keywords internal
+process_dbivreg_inputs = function(
+  fml,
+  conn,
+  table,
+  data,
+  path,
+  weights,
+  vcov,
+  cluster,
+  strategy,
+  sql_only,
+  data_only,
+  drop_missings,
+  verbose
+) {
+  vcov_type_req = vcov
+  cluster_var = cluster
+
+  parsed_fml = dbivreg_parse_formula(fml)
+  yvar = parsed_fml[["yvar"]]
+  fe = parsed_fml[["fe"]]
+
+  db_setup = setup_db_connection(conn, table, data, path, caller = "dbivreg")
+  conn = db_setup[["conn"]]
+  own_conn = db_setup[["own_conn"]]
+  from_statement = db_setup[["from_statement"]]
+  registered_table = db_setup[["registered_table"]]
+
+  if (!is.null(weights)) {
+    if (!is.character(weights) || length(weights) != 1) {
+      stop("`weights` must be a single character string (column name) or NULL.")
+    }
+    if (!is.null(data)) {
+      if (!weights %in% names(data)) {
+        stop("Weight column '", weights, "' not found in data.")
+      }
+      wv = data[[weights]]
+      if (any(!is.finite(wv) & !is.na(wv))) {
+        stop("Weights must be finite.")
+      }
+      if (any(wv < 0, na.rm = TRUE)) {
+        stop("Weights must be non-negative.")
+      }
+    } else {
+      neg_sql = glue(
+        "SELECT 1 FROM (SELECT * {from_statement}) t WHERE {weights} < 0 LIMIT 1"
+      )
+      has_neg = tryCatch(nrow(dbGetQuery(conn, neg_sql)) > 0, error = function(e) FALSE)
+      if (isTRUE(has_neg)) {
+        stop("Weights must be non-negative.")
+      }
+    }
+  }
+
+  if (isTRUE(drop_missings) || !is.null(weights)) {
+    if (grepl("WHERE|LIMIT|ORDER\\s+BY|GROUP\\s+BY|HAVING", from_statement, ignore.case = TRUE)) {
+      from_statement = glue("FROM (SELECT * {from_statement}) AS subq")
+    }
+    where_clauses = character(0)
+    if (isTRUE(drop_missings)) {
+      miss_vars = unique(c(
+        yvar,
+        parsed_fml[["exog_vars"]],
+        parsed_fml[["endog_vars"]],
+        parsed_fml[["instr_vars"]],
+        fe,
+        cluster_var
+      ))
+      miss_vars = miss_vars[!is.na(miss_vars) & nzchar(miss_vars)]
+      where_clauses = c(where_clauses, paste0(miss_vars, " IS NOT NULL"))
+    }
+    if (!is.null(weights)) {
+      where_clauses = c(where_clauses, paste0(weights, " > 0"))
+    }
+    if (length(where_clauses) > 0) {
+      from_statement = glue("
+      {from_statement}
+      WHERE {paste(where_clauses, collapse = ' AND ')}
+      ")
+    }
+  }
+
+  design_source = sub("^FROM\\s+", "", from_statement, ignore.case = TRUE)
+  exog_design = dbivreg_expand_block(parsed_fml[["exog_rhs"]], conn, design_source, fe)
+  endog_design = dbivreg_expand_block(parsed_fml[["endog_rhs"]], conn, design_source, fe)
+  instr_design = dbivreg_expand_block(parsed_fml[["instr_rhs"]], conn, design_source, fe)
+
+  if (!length(endog_design[["names"]])) {
+    stop("At least one endogenous regressor is required in the IV block.")
+  }
+  if (!length(instr_design[["names"]])) {
+    stop("At least one excluded instrument is required in the IV block.")
+  }
+
+  overlap_x_endog = intersect(exog_design[["names"]], endog_design[["names"]])
+  if (length(overlap_x_endog) > 0) {
+    stop(
+      "Endogenous regressors cannot also appear as included exogenous regressors. ",
+      "Overlapping term(s): ",
+      paste(standardize_coef_names(overlap_x_endog), collapse = ", ")
+    )
+  }
+  overlap_endog_iv = intersect(endog_design[["names"]], instr_design[["names"]])
+  if (length(overlap_endog_iv) > 0) {
+    stop(
+      "Excluded instruments cannot also appear as endogenous regressors. ",
+      "Overlapping term(s): ",
+      paste(standardize_coef_names(overlap_endog_iv), collapse = ", ")
+    )
+  }
+  overlap_exog_iv = intersect(exog_design[["names"]], instr_design[["names"]])
+  if (length(overlap_exog_iv) > 0) {
+    warning(
+      "Included exogenous regressors are already valid instruments. ",
+      "Dropping duplicate entries from the excluded-instrument block: ",
+      paste(standardize_coef_names(overlap_exog_iv), collapse = ", ")
+    )
+    keep_iv = !instr_design[["names"]] %in% overlap_exog_iv
+    instr_design[["sql"]] = instr_design[["sql"]][keep_iv]
+    instr_design[["names"]] = instr_design[["names"]][keep_iv]
+  }
+  if (length(instr_design[["names"]]) < length(endog_design[["names"]])) {
+    stop("Need at least as many excluded instrument columns as endogenous regressor columns.")
+  }
+
+  x_all_sql = c(exog_design[["sql"]], endog_design[["sql"]])
+  x_all_names = c(exog_design[["names"]], endog_design[["names"]])
+  z_all_names = c(exog_design[["names"]], instr_design[["names"]])
+
+  union_map = c(
+    stats::setNames(x_all_sql, x_all_names),
+    stats::setNames(instr_design[["sql"]], instr_design[["names"]])
+  )
+  union_map = union_map[!duplicated(names(union_map))]
+  union_names = names(union_map)
+  union_sql = unname(union_map)
+
+  list2env(list(
+    fml = fml,
+    structural_fml = dbivreg_structural_formula(
+      yvar = yvar,
+      exog_rhs = parsed_fml[["exog_rhs_text"]],
+      endog_rhs = parsed_fml[["endog_rhs_text"]],
+      fe_rhs = parsed_fml[["fe_rhs_text"]],
+      env = parsed_fml[["env"]]
+    ),
+    yvar = yvar,
+    fe = fe,
+    exog_names = exog_design[["names"]],
+    exog_sql = exog_design[["sql"]],
+    endog_names = endog_design[["names"]],
+    endog_sql = endog_design[["sql"]],
+    instr_names = instr_design[["names"]],
+    instr_sql = instr_design[["sql"]],
+    endog_vars = standardize_coef_names(endog_design[["names"]]),
+    instr_vars = standardize_coef_names(instr_design[["names"]]),
+    x_all_names = x_all_names,
+    z_all_names = z_all_names,
+    union_names = union_names,
+    union_sql = union_sql,
+    conn = conn,
+    from_statement = from_statement,
+    weights = weights,
+    vcov_type_req = vcov_type_req,
+    cluster_var = cluster_var,
+    strategy = strategy,
+    sql_only = sql_only,
+    data_only = data_only,
+    verbose = verbose,
+    own_conn = own_conn,
+    registered_table = registered_table
+  ), parent = emptyenv())
+}
+
+#' @keywords internal
+choose_dbivreg_strategy = function(inputs) {
+  strategy = inputs[["strategy"]]
+  fe = inputs[["fe"]]
+  verbose = inputs[["verbose"]]
+
+  if (strategy == "auto") {
+    chosen_strategy = if (length(fe) == 0) "moments" else "demean"
+    if (verbose) {
+      message("[dbivreg] Auto strategy: ", chosen_strategy)
+    }
+  } else {
+    chosen_strategy = strategy
+    if (verbose) {
+      message("[dbivreg] Using strategy: ", chosen_strategy)
+    }
+  }
+
+  if (chosen_strategy == "mundlak") {
+    stop(
+      "dbivreg() does not yet implement strategy = '",
+      chosen_strategy,
+      "'. Supported IV strategies are 'moments', 'compress', and 'demean'."
+    )
+  }
+  if (chosen_strategy == "compress" && length(fe) > 0) {
+    stop(
+      "strategy = 'compress' is only available without fixed effects in dbivreg(). ",
+      "Use strategy = 'demean' or strategy = 'auto' when fixed effects are present."
+    )
+  }
+  if (chosen_strategy == "moments" && length(fe) > 0) {
+    stop(
+      "strategy = 'moments' is only available without fixed effects. ",
+      "Use strategy = 'demean' or strategy = 'auto'."
+    )
+  }
+  if (chosen_strategy == "demean" && length(fe) < 1) {
+    stop(
+      "strategy = 'demean' requires at least one fixed effect. ",
+      "Use strategy = 'moments' or strategy = 'auto' for models without fixed effects."
+    )
+  }
+
+  chosen_strategy
+}
+
+#' @keywords internal
+execute_iv_moments_strategy = function(inputs) {
+  weight_alias = "__dbivreg_weight"
+  base_select = c(sprintf("%s AS %s", inputs[["yvar"]], inputs[["yvar"]]))
+  for (i in seq_along(inputs[["union_names"]])) {
+    if (identical(inputs[["union_sql"]][i], inputs[["union_names"]][i])) {
+      base_select = c(base_select, inputs[["union_names"]][i])
+    } else {
+      base_select = c(base_select, sprintf("%s AS %s", inputs[["union_sql"]][i], inputs[["union_names"]][i]))
+    }
+  }
+  if (!is.null(inputs[["weights"]])) {
+    base_select = c(base_select, sprintf("%s AS %s", inputs[["weights"]], weight_alias))
+  }
+  if (!is.null(inputs[["cluster_var"]]) && !inputs[["cluster_var"]] %in% c(inputs[["yvar"]], inputs[["union_names"]], weight_alias)) {
+    base_select = c(base_select, inputs[["cluster_var"]])
+  }
+
+  weights_expr = if (is.null(inputs[["weights"]])) NULL else sql_weight_expr(weight_alias)
+  moment_terms = dbivreg_union_moment_terms(
+    conn = inputs[["conn"]],
+    union_names = inputs[["union_names"]],
+    y_expr = inputs[["yvar"]],
+    weights_expr = weights_expr,
+    has_intercept = TRUE
+  )
+
+  cte_sql = paste0(
+    "WITH base AS (\n  SELECT ",
+    paste(base_select, collapse = ", "),
+    " ",
+    inputs[["from_statement"]],
+    "\n)"
+  )
+  moments_sql = paste0(
+    cte_sql,
+    "\nSELECT\n  ",
+    paste(moment_terms, collapse = ",\n  "),
+    "\nFROM base"
+  )
+
+  if (inputs[["sql_only"]]) {
+    return(moments_sql)
+  }
+  if (inputs[["verbose"]]) {
+    message(if (!is.null(inputs[["weights"]])) "[dbivreg] Executing weighted IV moments SQL\n" else "[dbivreg] Executing IV moments SQL\n")
+  }
+
+  moments_df = dbGetQuery(inputs[["conn"]], moments_sql)
+  if (inputs[["data_only"]]) {
+    return(moments_df)
+  }
+
+  dbivreg_finalize_fit(
+    inputs = inputs,
+    moment_df = moments_df,
+    query_string = moments_sql,
+    cte_sql = cte_sql,
+    cte_name = "base",
+    x_names_full = c("(Intercept)", inputs[["x_all_names"]]),
+    z_names_full = c("(Intercept)", inputs[["z_all_names"]]),
+    has_intercept = TRUE,
+    weights_expr = weights_expr,
+    x_vars_sql = inputs[["x_all_names"]],
+    z_vars_sql = inputs[["z_all_names"]],
+    yvar_sql = inputs[["yvar"]]
+  )
+}
+
+#' @keywords internal
+execute_iv_compress_strategy = function(inputs) {
+  if (length(inputs[["fe"]]) > 0) {
+    stop(
+      "strategy = 'compress' is only available without fixed effects in dbivreg(). ",
+      "Use strategy = 'demean' or strategy = 'auto' when fixed effects are present."
+    )
+  }
+
+  yvar = inputs[["yvar"]]
+  weight_alias = "__dbivreg_weight"
+  base_select = c(sprintf("%s AS %s", yvar, yvar))
+  for (i in seq_along(inputs[["union_names"]])) {
+    if (identical(inputs[["union_sql"]][i], inputs[["union_names"]][i])) {
+      base_select = c(base_select, inputs[["union_names"]][i])
+    } else {
+      base_select = c(base_select, sprintf("%s AS %s", inputs[["union_sql"]][i], inputs[["union_names"]][i]))
+    }
+  }
+  if (!is.null(inputs[["weights"]])) {
+    base_select = c(base_select, sprintf("%s AS %s", inputs[["weights"]], weight_alias))
+  }
+  if (!is.null(inputs[["cluster_var"]]) && !inputs[["cluster_var"]] %in% c(yvar, inputs[["union_names"]], weight_alias)) {
+    base_select = c(base_select, inputs[["cluster_var"]])
+  }
+
+  weights_expr = if (is.null(inputs[["weights"]])) NULL else sql_weight_expr(weight_alias)
+  compressed_terms = c(
+    sql_count(inputs[["conn"]], "n"),
+    if (is.null(weights_expr)) sql_count(inputs[["conn"]], "sum_w") else glue("SUM({weights_expr}) AS sum_w"),
+    sql_weighted_sum(yvar, weights_expr, "sum_wy"),
+    sql_weighted_sum(glue("({yvar}) * ({yvar})"), weights_expr, "sum_wy_sq")
+  )
+  compressed_moment_terms = dbivreg_compressed_moment_terms(
+    conn = inputs[["conn"]],
+    union_names = inputs[["union_names"]],
+    has_intercept = TRUE
+  )
+  group_cols_sql = paste(inputs[["union_names"]], collapse = ", ")
+
+  cte_sql = paste0(
+    "WITH base AS (\n  SELECT ",
+    paste(base_select, collapse = ", "),
+    " ",
+    inputs[["from_statement"]],
+    "\n)"
+  )
+  compress_sql = paste0(
+    cte_sql,
+    ",\ncompressed AS (\n  SELECT\n    ",
+    group_cols_sql,
+    ",\n    ",
+    paste(compressed_terms, collapse = ",\n    "),
+    "\n  FROM base\n  GROUP BY ",
+    group_cols_sql,
+    "\n),\nmoments AS (\n  SELECT\n    ",
+    paste(compressed_moment_terms, collapse = ",\n    "),
+    "\n  FROM compressed\n)\nSELECT * FROM moments"
+  )
+
+  if (inputs[["sql_only"]]) {
+    return(compress_sql)
+  }
+  if (inputs[["verbose"]]) {
+    message(if (!is.null(inputs[["weights"]])) "[dbivreg] Executing weighted compressed IV SQL\n" else "[dbivreg] Executing compressed IV SQL\n")
+  }
+
+  moments_df = dbGetQuery(inputs[["conn"]], compress_sql)
+  if (inputs[["data_only"]]) {
+    return(moments_df)
+  }
+
+  compression_ratio = moments_df[["n_compressed"]] / max(moments_df[["n_total"]], 1)
+  if (inputs[["verbose"]] && compression_ratio > 0.8) {
+    warning(paste0(
+      sprintf("[dbivreg] compression ineffective (%.1f%% of original rows). ", 100 * compression_ratio),
+      "Consider strategy = 'moments'."
+    ))
+  }
+
+  result = dbivreg_finalize_fit(
+    inputs = inputs,
+    moment_df = moments_df,
+    query_string = compress_sql,
+    cte_sql = cte_sql,
+    cte_name = "base",
+    x_names_full = c("(Intercept)", inputs[["x_all_names"]]),
+    z_names_full = c("(Intercept)", inputs[["z_all_names"]]),
+    has_intercept = TRUE,
+    weights_expr = weights_expr,
+    x_vars_sql = inputs[["x_all_names"]],
+    z_vars_sql = inputs[["z_all_names"]],
+    yvar_sql = yvar
+  )
+  result[["nobs"]] = moments_df[["n_compressed"]]
+  result[["compression_ratio"]] = compression_ratio
+  result
+}
+
+#' @keywords internal
+execute_iv_demean_strategy = function(inputs) {
+  yvar = inputs[["yvar"]]
+  fe = inputs[["fe"]]
+  union_names = inputs[["union_names"]]
+  weights_expr_base = sql_weight_expr(inputs[["weights"]])
+  weights_expr_demeaned = if (is.null(inputs[["weights"]])) NULL else sql_weight_expr("weights")
+
+  all_var_names = c(yvar, union_names)
+  cluster_var = inputs[["cluster_var"]]
+  ap_tables = NULL
+
+  use_ap = FALSE
+  if (length(fe) >= 2) {
+    if (length(fe) > 2) {
+      use_ap = TRUE
+    } else {
+      is_balanced = dbreg_is_balanced_panel(inputs[["conn"]], inputs[["from_statement"]], fe)
+      use_ap = !is.null(inputs[["weights"]]) || !isTRUE(is_balanced)
+    }
+    if (isTRUE(use_ap) && inputs[["verbose"]]) {
+      message("[dbivreg] Using alternating projections for FE demeaning")
+    }
+    if (isTRUE(use_ap) && inputs[["sql_only"]]) {
+      stop("[dbivreg] sql_only is not supported for alternating projections.", call. = FALSE)
+    }
+  }
+
+  if (length(fe) == 1) {
+    fe1 = fe[1]
+    base_select = c(fe1, yvar)
+    for (i in seq_along(union_names)) {
+      base_select = c(base_select, sprintf("%s AS %s", inputs[["union_sql"]][i], union_names[i]))
+    }
+    if (!is.null(inputs[["weights"]]) && !inputs[["weights"]] %in% c(fe1, yvar, union_names)) {
+      base_select = c(base_select, inputs[["weights"]])
+    }
+    if (!is.null(cluster_var) && !cluster_var %in% c(fe1, yvar, union_names, inputs[["weights"]])) {
+      base_select = c(base_select, cluster_var)
+    }
+
+    means_cols = paste(
+      vapply(
+        all_var_names,
+        function(v) sql_weighted_mean(v, weights_expr_base, paste0(v, "_mean")),
+        character(1)
+      ),
+      collapse = ", "
+    )
+    tilde_exprs = paste(
+      sprintf("(b.%s - gm.%s_mean) AS %s_tilde", all_var_names, all_var_names, all_var_names),
+      collapse = ",\n       "
+    )
+    if (!is.null(inputs[["weights"]])) {
+      tilde_exprs = paste(
+        tilde_exprs,
+        sprintf("b.%s AS weights", inputs[["weights"]]),
+        sep = ",\n       "
+      )
+    }
+    if (!is.null(cluster_var) && !cluster_var %in% fe1) {
+      tilde_exprs = paste(
+        tilde_exprs,
+        sprintf("b.%s AS %s", cluster_var, cluster_var),
+        sep = ",\n       "
+      )
+    }
+
+    cte_sql = paste0(
+      "WITH base AS (
+      SELECT ", paste(base_select, collapse = ", "), " ",
+      inputs[["from_statement"]],
+      "
+      ),
+      group_means AS (
+      SELECT ",
+      fe1,
+      ", ",
+      means_cols,
+      " FROM base GROUP BY ",
+      fe1,
+      "
+      ),
+      demeaned AS (
+      SELECT
+          b.",
+      fe1,
+      ",
+          ",
+      tilde_exprs,
+      "
+      FROM base b
+      JOIN group_means gm ON b.",
+      fe1,
+      " = gm.",
+      fe1,
+      "
+      )"
+    )
+  } else {
+    fe1 = fe[1]
+    fe2 = fe[2]
+
+    if (isTRUE(use_ap)) {
+      ap_res = dbreg_alternating_projections(
+        conn = inputs[["conn"]],
+        from_statement = inputs[["from_statement"]],
+        fe = fe,
+        yvar = yvar,
+        xvars_sql = inputs[["union_sql"]],
+        xvar_names = union_names,
+        weights = inputs[["weights"]],
+        cluster_var = cluster_var,
+        verbose = inputs[["verbose"]]
+      )
+      ap_tables = c(ap_res[["table"]], ap_res[["base_table"]])
+      weights_expr_demeaned = "__w"
+      cte_sql = paste0("WITH demeaned AS (SELECT * FROM ", ap_res[["table"]], ")")
+    } else {
+      base_select = c(fe1, fe2, yvar)
+      for (i in seq_along(union_names)) {
+        base_select = c(base_select, sprintf("%s AS %s", inputs[["union_sql"]][i], union_names[i]))
+      }
+      if (!is.null(inputs[["weights"]]) && !inputs[["weights"]] %in% c(fe1, fe2, yvar, union_names)) {
+        base_select = c(base_select, inputs[["weights"]])
+      }
+      if (!is.null(cluster_var) && !cluster_var %in% c(fe1, fe2, yvar, union_names, inputs[["weights"]])) {
+        base_select = c(base_select, cluster_var)
+      }
+
+      unit_means_cols = paste(
+        vapply(
+          all_var_names,
+          function(v) sql_weighted_mean(v, weights_expr_base, paste0(v, "_u")),
+          character(1)
+        ),
+        collapse = ", "
+      )
+      time_means_cols = paste(
+        vapply(
+          all_var_names,
+          function(v) sql_weighted_mean(v, weights_expr_base, paste0(v, "_t")),
+          character(1)
+        ),
+        collapse = ", "
+      )
+      overall_cols = paste(
+        vapply(
+          all_var_names,
+          function(v) sql_weighted_mean(v, weights_expr_base, paste0(v, "_o")),
+          character(1)
+        ),
+        collapse = ", "
+      )
+      tilde_exprs = paste(
+        sprintf(
+          "(b.%s - um.%s_u - tm.%s_t + o.%s_o) AS %s_tilde",
+          all_var_names,
+          all_var_names,
+          all_var_names,
+          all_var_names,
+          all_var_names
+        ),
+        collapse = ",\n       "
+      )
+      if (!is.null(cluster_var) && !cluster_var %in% c(fe1, fe2)) {
+        tilde_exprs = paste(
+          tilde_exprs,
+          sprintf("b.%s AS %s", cluster_var, cluster_var),
+          sep = ",\n       "
+        )
+      }
+      if (!is.null(inputs[["weights"]])) {
+        tilde_exprs = paste(
+          tilde_exprs,
+          sprintf("b.%s AS weights", inputs[["weights"]]),
+          sep = ",\n       "
+        )
+      }
+
+      cte_sql = paste0(
+        "WITH base AS (
+        SELECT ", paste(base_select, collapse = ", "), " ",
+        inputs[["from_statement"]],
+        "
+        ),
+        unit_means AS (
+        SELECT ",
+        fe1,
+        ", ",
+        unit_means_cols,
+        " FROM base GROUP BY ",
+        fe1,
+        "
+        ),
+        time_means AS (
+        SELECT ",
+        fe2,
+        ", ",
+        time_means_cols,
+        " FROM base GROUP BY ",
+        fe2,
+        "
+        ),
+        overall AS (
+        SELECT ",
+        overall_cols,
+        " FROM base
+        ),
+        demeaned AS (
+        SELECT
+            b.",
+        fe1,
+        ",
+            b.",
+        fe2,
+        ",
+            ",
+        tilde_exprs,
+        "
+        FROM base b
+        JOIN unit_means um ON b.",
+        fe1,
+        " = um.",
+        fe1,
+        "
+        JOIN time_means tm ON b.",
+        fe2,
+        " = tm.",
+        fe2,
+        "
+        CROSS JOIN overall o
+        )"
+      )
+    }
+  }
+
+  fe_count_terms = vapply(seq_along(fe), function(k) {
+    sql_count(inputs[["conn"]], sprintf("n_fe%d", k), fe[k], distinct = TRUE)
+  }, character(1))
+  if (length(fe) == 1) {
+    fe_count_terms = c(fe_count_terms, "1 AS n_fe2")
+  }
+  moment_terms = c(
+    sql_count(inputs[["conn"]], "n_total"),
+    fe_count_terms,
+    sql_weighted_sum(
+      glue("CAST({yvar}_tilde AS FLOAT) * CAST({yvar}_tilde AS FLOAT)"),
+      weights_expr_demeaned,
+      "sum_y_sq"
+    )
+  )
+  for (i in seq_along(union_names)) {
+    moment_terms = c(
+      moment_terms,
+      sql_weighted_sum(
+        glue("CAST({union_names[i]}_tilde AS FLOAT) * CAST({yvar}_tilde AS FLOAT)"),
+        weights_expr_demeaned,
+        sprintf("sum_u%d_y", i)
+      ),
+      sql_weighted_sum(
+        glue("CAST({union_names[i]}_tilde AS FLOAT) * CAST({union_names[i]}_tilde AS FLOAT)"),
+        weights_expr_demeaned,
+        sprintf("sum_u%d_%d", i, i)
+      )
+    )
+  }
+  if (length(union_names) > 1) {
+    for (i in seq_along(union_names)) {
+      if (i == 1) {
+        next
+      }
+      for (j in seq_len(i - 1)) {
+        moment_terms = c(
+          moment_terms,
+          sql_weighted_sum(
+            glue("CAST({union_names[j]}_tilde AS FLOAT) * CAST({union_names[i]}_tilde AS FLOAT)"),
+            weights_expr_demeaned,
+            sprintf("sum_u%d_%d", j, i)
+          )
+        )
+      }
+    }
+  }
+
+  demean_sql = paste0(
+    cte_sql,
+    ",
+      moments AS (
+      SELECT
+          ",
+    paste(moment_terms, collapse = ",\n    "),
+    "
+      FROM demeaned
+      )
+      SELECT * FROM moments"
+  )
+
+  if (inherits(inputs[["conn"]], "AthenaConnection")) {
+    demean_sql = gsub("FLOAT", "REAL", demean_sql, fixed = TRUE)
+  }
+  if (inputs[["sql_only"]]) {
+    return(demean_sql)
+  }
+  if (inputs[["verbose"]]) {
+    message(if (!is.null(inputs[["weights"]])) "[dbivreg] Executing weighted demeaned IV SQL\n" else "[dbivreg] Executing demeaned IV SQL\n")
+  }
+
+  ap_cleanup = function() {
+    if (!is.null(ap_tables)) {
+      backend = detect_backend(inputs[["conn"]])[["name"]]
+      for (tbl in ap_tables) {
+        drop_table_if_exists(inputs[["conn"]], tbl, backend)
+      }
+    }
+  }
+  on.exit(ap_cleanup(), add = TRUE)
+
+  demean_df = dbGetQuery(inputs[["conn"]], demean_sql)
+  if (inputs[["data_only"]]) {
+    return(demean_df)
+  }
+
+  dbivreg_finalize_fit(
+    inputs = inputs,
+    moment_df = demean_df,
+    query_string = demean_sql,
+    cte_sql = cte_sql,
+    cte_name = "demeaned",
+    x_names_full = inputs[["x_all_names"]],
+    z_names_full = inputs[["z_all_names"]],
+    has_intercept = FALSE,
+    weights_expr = weights_expr_demeaned,
+    x_vars_sql = paste0(inputs[["x_all_names"]], "_tilde"),
+    z_vars_sql = paste0(inputs[["z_all_names"]], "_tilde"),
+    yvar_sql = paste0(inputs[["yvar"]], "_tilde")
+  )
+}
+
+#' @keywords internal
+dbivreg_finalize_fit = function(
+  inputs,
+  moment_df,
+  query_string,
+  cte_sql,
+  cte_name,
+  x_names_full,
+  z_names_full,
+  has_intercept,
+  weights_expr,
+  x_vars_sql,
+  z_vars_sql,
+  yvar_sql
+) {
+  moms = dbivreg_reconstruct_moments(moment_df, inputs[["union_names"]], has_intercept)
+
+  S_xx = moms$S_uu[x_names_full, x_names_full, drop = FALSE]
+  S_xy = moms$S_uy[x_names_full, , drop = FALSE]
+  S_zz = moms$S_uu[z_names_full, z_names_full, drop = FALSE]
+  S_zx = moms$S_uu[z_names_full, x_names_full, drop = FALSE]
+  S_zy = moms$S_uy[z_names_full, , drop = FALSE]
+
+  if (qr(S_zz, tol = 1e-10)$rank < ncol(S_zz)) {
+    stop("Instrument matrix is rank deficient.")
+  }
+  z_inv = solve_with_fallback(S_zz, diag(ncol(S_zz)))$XtX_inv
+  A = Matrix::t(S_zx) %*% z_inv %*% S_zx
+  if (qr(A, tol = 1e-10)$rank < ncol(A)) {
+    stop("IV system is not identified or regressors are collinear after instrumentation.")
+  }
+  rhs = Matrix::t(S_zx) %*% z_inv %*% S_zy
+  solve_result = solve_with_fallback(A, rhs)
+  betahat = solve_result$betahat
+  A_inv = solve_result$XtX_inv
+  rownames(betahat) = x_names_full
+
+  rss = as.numeric(
+    moms$sum_y_sq -
+      2 * Matrix::t(betahat) %*% S_xy +
+      Matrix::t(betahat) %*% S_xx %*% betahat
+  )
+
+  n_fe = length(inputs[["fe"]])
+  n_fe_levels = if (n_fe == 0) {
+    numeric(0)
+  } else {
+    stats::setNames(
+      vapply(seq_len(n_fe), function(k) {
+        val = moment_df[[sprintf("n_fe%d", k)]]
+        if (is.null(val)) 1 else as.numeric(val)
+      }, numeric(1)),
+      inputs[["fe"]]
+    )
+  }
+  df_fe = if (n_fe == 0) 0 else sum(n_fe_levels) - (n_fe - 1)
+  df_res = max(moment_df$n_total - length(x_names_full) - df_fe, 1)
+  n_params = length(x_names_full) + df_fe
+
+  tss = if (has_intercept) {
+    moms$sum_y_sq - (moms$sum_wy^2 / moms$sum_w)
+  } else {
+    moms$sum_y_sq
+  }
+
+  meat = NULL
+  is_athena = inherits(inputs[["conn"]], "AthenaConnection")
+  if (inputs[["vcov_type_req"]] == "hc1") {
+    meat = compute_iv_meat_sql(
+      conn = inputs[["conn"]],
+      cte_sql = cte_sql,
+      x_vars = x_names_full[!x_names_full %in% "(Intercept)"],
+      z_vars = z_names_full[!z_names_full %in% "(Intercept)"],
+      yvar = inputs[["yvar"]],
+      betahat = betahat,
+      weights_expr = weights_expr,
+      cte_name = cte_name,
+      has_intercept = has_intercept,
+      is_athena = is_athena,
+      x_vars_sql = x_vars_sql,
+      z_vars_sql = z_vars_sql,
+      yvar_sql = yvar_sql
+    )
+  } else if (inputs[["vcov_type_req"]] == "cluster") {
+    meat = compute_iv_meat_cluster_sql(
+      conn = inputs[["conn"]],
+      cte_sql = cte_sql,
+      x_vars = x_names_full[!x_names_full %in% "(Intercept)"],
+      z_vars = z_names_full[!z_names_full %in% "(Intercept)"],
+      yvar = inputs[["yvar"]],
+      betahat = betahat,
+      cluster_var = inputs[["cluster_var"]],
+      weights_expr = weights_expr,
+      cte_name = cte_name,
+      has_intercept = has_intercept,
+      is_athena = is_athena,
+      x_vars_sql = x_vars_sql,
+      z_vars_sql = z_vars_sql,
+      yvar_sql = yvar_sql
+    )
+  }
+
+  vcov_mat = compute_iv_vcov(
+    vcov_type = inputs[["vcov_type_req"]],
+    A_inv = A_inv,
+    ZtZ_inv = z_inv,
+    ZtX = S_zx,
+    rss = rss,
+    df_res = df_res,
+    nobs_orig = moment_df$n_total,
+    n_params = n_params,
+    meat = meat
+  )
+  attr(vcov_mat, "rss") = rss
+  attr(vcov_mat, "tss") = tss
+
+  coeftable = gen_coeftable(betahat, vcov_mat, df_res)
+  iv_diagnostics = compute_dbivreg_diagnostics(
+    inputs = inputs,
+    moms = moms,
+    cte_sql = cte_sql,
+    cte_name = cte_name,
+    has_intercept = has_intercept,
+    weights_expr = weights_expr,
+    n_total = moment_df$n_total,
+    df_fe = df_fe,
+    rss = rss,
+    betahat = betahat,
+    S_zz = S_zz,
+    S_zx = S_zx,
+    S_zy = S_zy,
+    meat = meat
+  )
+
+  out = list(
+    coeftable = coeftable,
+    vcov = vcov_mat,
+    fml = inputs[["structural_fml"]],
+    yvar = inputs[["yvar"]],
+    xvars = standardize_coef_names(x_names_full[x_names_full != "(Intercept)"]),
+    fe = inputs[["fe"]],
+    weights = inputs[["weights"]],
+    query_string = query_string,
+    nobs = 1L,
+    nobs_orig = moment_df$n_total,
+    estimator = "2SLS",
+    endogenous = inputs[["endog_vars"]],
+    instruments = inputs[["instr_vars"]],
+    diagnostics = iv_diagnostics,
+    df_residual = df_res,
+    n_fe_levels = if (n_fe > 0) n_fe_levels else NULL,
+    n_fe1 = if ("n_fe1" %in% names(moment_df)) moment_df$n_fe1 else NULL,
+    n_fe2 = if ("n_fe2" %in% names(moment_df)) moment_df$n_fe2 else NULL
+  )
+  if (n_fe > 2) {
+    for (k in seq.int(3L, n_fe)) {
+      out[[sprintf("n_fe%d", k)]] = unname(n_fe_levels[k])
+    }
+  }
+  out
+}
+
+#' @keywords internal
+dbivreg_reconstruct_moments = function(moment_df, union_names, has_intercept) {
+  if (has_intercept) {
+    union_all = c("(Intercept)", union_names)
+    S_uu = matrix(0, length(union_all), length(union_all), dimnames = list(union_all, union_all))
+    S_uy = matrix(0, length(union_all), 1, dimnames = list(union_all, ""))
+
+    S_uu["(Intercept)", "(Intercept)"] = moment_df$sum_w
+    S_uy["(Intercept)", ] = moment_df$sum_wy
+    for (i in seq_along(union_names)) {
+      S_uu["(Intercept)", union_names[i]] = S_uu[union_names[i], "(Intercept)"] = moment_df[[sprintf("sum_u%d", i)]]
+      S_uu[union_names[i], union_names[i]] = moment_df[[sprintf("sum_u%d_%d", i, i)]]
+      S_uy[union_names[i], ] = moment_df[[sprintf("sum_u%d_y", i)]]
+    }
+    if (length(union_names) > 1) {
+      for (i in seq_along(union_names)) {
+        if (i == 1) {
+          next
+        }
+        for (j in seq_len(i - 1)) {
+          S_uu[union_names[i], union_names[j]] = S_uu[union_names[j], union_names[i]] = moment_df[[sprintf("sum_u%d_%d", j, i)]]
+        }
+      }
+    }
+    list(
+      S_uu = S_uu,
+      S_uy = S_uy,
+      sum_w = moment_df$sum_w,
+      sum_wy = moment_df$sum_wy,
+      sum_y_sq = moment_df$sum_wy_sq
+    )
+  } else {
+    S_uu = matrix(0, length(union_names), length(union_names), dimnames = list(union_names, union_names))
+    S_uy = matrix(0, length(union_names), 1, dimnames = list(union_names, ""))
+
+    for (i in seq_along(union_names)) {
+      S_uu[i, i] = moment_df[[sprintf("sum_u%d_%d", i, i)]]
+      S_uy[i, ] = moment_df[[sprintf("sum_u%d_y", i)]]
+    }
+    if (length(union_names) > 1) {
+      for (i in seq_along(union_names)) {
+        if (i == 1) {
+          next
+        }
+        for (j in seq_len(i - 1)) {
+          S_uu[i, j] = S_uu[j, i] = moment_df[[sprintf("sum_u%d_%d", j, i)]]
+        }
+      }
+    }
+    list(
+      S_uu = S_uu,
+      S_uy = S_uy,
+      sum_w = NA_real_,
+      sum_wy = NA_real_,
+      sum_y_sq = moment_df$sum_y_sq
+    )
+  }
+}
+
+#' @keywords internal
+dbivreg_union_moment_terms = function(conn, union_names, y_expr, weights_expr, has_intercept) {
+  terms = c(
+    sql_count(conn, "n_total"),
+    if (is.null(weights_expr)) sql_count(conn, "sum_w") else glue("SUM({weights_expr}) AS sum_w"),
+    sql_weighted_sum(y_expr, weights_expr, "sum_wy"),
+    sql_weighted_sum(glue("({y_expr}) * ({y_expr})"), weights_expr, "sum_wy_sq")
+  )
+  for (i in seq_along(union_names)) {
+    if (has_intercept) {
+      terms = c(terms, sql_weighted_sum(union_names[i], weights_expr, sprintf("sum_u%d", i)))
+    }
+    terms = c(
+      terms,
+      sql_weighted_sum(glue("({union_names[i]}) * ({y_expr})"), weights_expr, sprintf("sum_u%d_y", i)),
+      sql_weighted_sum(glue("({union_names[i]}) * ({union_names[i]})"), weights_expr, sprintf("sum_u%d_%d", i, i))
+    )
+  }
+  if (length(union_names) > 1) {
+    for (i in seq_along(union_names)) {
+      if (i == 1) {
+        next
+      }
+      for (j in seq_len(i - 1)) {
+        terms = c(
+          terms,
+          sql_weighted_sum(glue("({union_names[j]}) * ({union_names[i]})"), weights_expr, sprintf("sum_u%d_%d", j, i))
+        )
+      }
+    }
+  }
+  terms
+}
+
+#' @keywords internal
+dbivreg_compressed_moment_terms = function(conn, union_names, has_intercept) {
+  terms = c(
+    "SUM(n) AS n_total",
+    sql_count(conn, "n_compressed"),
+    "SUM(sum_w) AS sum_w",
+    "SUM(sum_wy) AS sum_wy",
+    "SUM(sum_wy_sq) AS sum_wy_sq"
+  )
+  for (i in seq_along(union_names)) {
+    if (has_intercept) {
+      terms = c(
+        terms,
+        glue("SUM(sum_w * ({union_names[i]})) AS sum_u{i}")
+      )
+    }
+    terms = c(
+      terms,
+      glue("SUM(sum_wy * ({union_names[i]})) AS sum_u{i}_y"),
+      glue("SUM(sum_w * ({union_names[i]}) * ({union_names[i]})) AS sum_u{i}_{i}")
+    )
+  }
+  if (length(union_names) > 1) {
+    for (i in seq_along(union_names)) {
+      if (i == 1) {
+        next
+      }
+      for (j in seq_len(i - 1)) {
+        terms = c(
+          terms,
+          glue("SUM(sum_w * ({union_names[j]}) * ({union_names[i]})) AS sum_u{j}_{i}")
+        )
+      }
+    }
+  }
+  terms
+}
+
+#' @keywords internal
+dbivreg_structural_formula = function(yvar, exog_rhs, endog_rhs, fe_rhs = NULL, env = parent.frame()) {
+  rhs_parts = c(dbivreg_trim_ws(exog_rhs), dbivreg_trim_ws(endog_rhs))
+  rhs_parts = rhs_parts[nzchar(rhs_parts)]
+  rhs = paste(rhs_parts, collapse = " + ")
+  if (is.null(fe_rhs) || !nzchar(dbivreg_trim_ws(fe_rhs))) {
+    return(Formula(stats::as.formula(paste(yvar, "~", rhs), env = env)))
+  }
+  Formula(stats::as.formula(
+    paste(yvar, "~", rhs, "|", dbivreg_trim_ws(fe_rhs)),
+    env = env
+  ))
+}
+
+#' @keywords internal
+dbivreg_parse_formula = function(fml) {
+  if (inherits(fml, "Formula")) {
+    fml = formula(fml)
+  }
+  if (!inherits(fml, "formula")) {
+    stop("`fml` must be a formula.")
+  }
+
+  env = environment(fml)
+  fml_text = paste(deparse(fml, width.cutoff = 500L), collapse = " ")
+  main_tilde = dbivreg_find_top_level(fml_text, "~")
+  if (is.na(main_tilde)) {
+    stop(
+      "dbivreg() requires a fixest-style IV formula: ",
+      "`y ~ x | d ~ z` or `y ~ x | fe1 + fe2 | d ~ z`."
+    )
+  }
+
+  lhs_text = dbivreg_trim_ws(substr(fml_text, 1L, main_tilde - 1L))
+  rhs_text = dbivreg_trim_ws(substr(fml_text, main_tilde + 1L, nchar(fml_text)))
+  pipe_parts = dbivreg_split_top_level(rhs_text, "|")
+  if (!(length(pipe_parts) %in% c(2L, 3L))) {
+    stop(
+      "dbivreg() requires a fixest-style IV formula with the IV block last: ",
+      "`y ~ x | d ~ z` or `y ~ x | fe1 + fe2 | d ~ z`."
+    )
+  }
+
+  exog_rhs_text = pipe_parts[1]
+  fe_rhs_text = if (length(pipe_parts) == 3L) pipe_parts[2] else NULL
+  iv_rhs_text = pipe_parts[length(pipe_parts)]
+
+  iv_tilde = dbivreg_find_top_level(iv_rhs_text, "~")
+  if (is.na(iv_tilde)) {
+    stop(
+      "The final pipe-separated block must be the IV specification, e.g. `d ~ z`."
+    )
+  }
+
+  endog_rhs_text = dbivreg_trim_ws(substr(iv_rhs_text, 1L, iv_tilde - 1L))
+  instr_rhs_text = dbivreg_trim_ws(substr(iv_rhs_text, iv_tilde + 1L, nchar(iv_rhs_text)))
+  if (!nzchar(endog_rhs_text) || !nzchar(instr_rhs_text)) {
+    stop("The IV block must contain both endogenous regressors and excluded instruments.")
+  }
+
+  y_formula = stats::as.formula(paste(lhs_text, "~ 1"), env = env)
+  yvar = all.vars(y_formula)
+  if (length(yvar) != 1L) {
+    stop("Exactly one outcome variable required.")
+  }
+
+  exog_rhs = stats::as.formula(paste("~", exog_rhs_text), env = env)
+  endog_rhs = stats::as.formula(paste("~", endog_rhs_text), env = env)
+  instr_rhs = stats::as.formula(paste("~", instr_rhs_text), env = env)
+
+  fe = NULL
+  if (!is.null(fe_rhs_text)) {
+    fe_rhs = stats::as.formula(paste("~", fe_rhs_text), env = env)
+    fe_terms = attr(terms(fe_rhs), "term.labels")
+    fe_vars = all.vars(fe_rhs)
+    if (!length(fe_terms)) {
+      stop("Fixed-effects block cannot be empty.")
+    }
+    if (length(fe_terms) != length(fe_vars) || !all(fe_terms %in% fe_vars)) {
+      stop("Fixed effects must be specified as simple additive variable names.")
+    }
+    fe = unique(fe_terms)
+  }
+
+  list(
+    fml = fml,
+    env = env,
+    yvar = yvar,
+    exog_rhs = exog_rhs,
+    exog_rhs_text = exog_rhs_text,
+    exog_vars = all.vars(exog_rhs),
+    endog_rhs = endog_rhs,
+    endog_rhs_text = endog_rhs_text,
+    endog_vars = all.vars(endog_rhs),
+    instr_rhs = instr_rhs,
+    instr_rhs_text = instr_rhs_text,
+    instr_vars = all.vars(instr_rhs),
+    fe = fe,
+    fe_rhs_text = fe_rhs_text
+  )
+}
+
+#' @keywords internal
+#' Expands the right-hand side of the structural formula into SQL expressions for the design matrix, including fixed effects if specified.
+dbivreg_expand_block = function(rhs_formula, conn, table, fe_vars = character()) {
+  if (is.null(fe_vars)) {
+    fe_vars = character()
+  }
+  sql_design = sql_model_matrix(
+    rhs_formula,
+    conn,
+    table,
+    expand = "all",
+    fe_vars = fe_vars
+  )
+  if (anyDuplicated(sql_design$col_names) > 0) {
+    dupes = unique(sql_design$col_names[duplicated(sql_design$col_names)])
+    stop(
+      "IV design matrix contains duplicate column names: ",
+      paste(standardize_coef_names(dupes), collapse = ", ")
+    )
+  }
+  list(
+    sql = sql_design$select_exprs,
+    names = sql_design$col_names,
+    vars = all.vars(rhs_formula),
+    term_labels = attr(terms(rhs_formula), "term.labels")
+  )
+}
+
+#' @keywords internal
+dbivreg_find_top_level = function(x, delim) {
+  chars = strsplit(x, "", fixed = TRUE)[[1]]
+  if (!length(chars)) {
+    return(NA_integer_)
+  }
+
+  depth_paren = 0L
+  depth_bracket = 0L
+  depth_brace = 0L
+  in_single = FALSE
+  in_double = FALSE
+  in_backtick = FALSE
+
+  for (i in seq_along(chars)) {
+    ch = chars[i]
+    prev = if (i > 1L) chars[i - 1L] else ""
+
+    if (in_single) {
+      if (ch == "'" && prev != "\\") in_single = FALSE
+      next
+    }
+    if (in_double) {
+      if (ch == "\"" && prev != "\\") in_double = FALSE
+      next
+    }
+    if (in_backtick) {
+      if (ch == "`") in_backtick = FALSE
+      next
+    }
+
+    if (ch == "'") {
+      in_single = TRUE
+      next
+    }
+    if (ch == "\"") {
+      in_double = TRUE
+      next
+    }
+    if (ch == "`") {
+      in_backtick = TRUE
+      next
+    }
+
+    if (ch == "(") {
+      depth_paren = depth_paren + 1L
+      next
+    }
+    if (ch == ")") {
+      depth_paren = max(depth_paren - 1L, 0L)
+      next
+    }
+    if (ch == "[") {
+      depth_bracket = depth_bracket + 1L
+      next
+    }
+    if (ch == "]") {
+      depth_bracket = max(depth_bracket - 1L, 0L)
+      next
+    }
+    if (ch == "{") {
+      depth_brace = depth_brace + 1L
+      next
+    }
+    if (ch == "}") {
+      depth_brace = max(depth_brace - 1L, 0L)
+      next
+    }
+
+    if (
+      ch == delim &&
+      depth_paren == 0L &&
+      depth_bracket == 0L &&
+      depth_brace == 0L
+    ) {
+      return(i)
+    }
+  }
+
+  NA_integer_
+}
+
+#' @keywords internal
+dbivreg_split_top_level = function(x, delim) {
+  chars = strsplit(x, "", fixed = TRUE)[[1]]
+  if (!length(chars)) {
+    return(character(0))
+  }
+
+  starts = 1L
+  parts = character(0)
+  repeat {
+    subx = paste(chars[starts:length(chars)], collapse = "")
+    pos = dbivreg_find_top_level(subx, delim)
+    if (is.na(pos)) {
+      parts = c(parts, dbivreg_trim_ws(subx))
+      break
+    }
+    end = starts + pos - 2L
+    parts = c(parts, dbivreg_trim_ws(paste(chars[starts:end], collapse = "")))
+    starts = starts + pos
+  }
+  parts
+}
+
+#' @keywords internal
+dbivreg_trim_ws = function(x) {
+  gsub("^\\s+|\\s+$", "", x)
+}
+
+#' @keywords internal
+compute_iv_vcov = function(
+  vcov_type = "iid",
+  A_inv,
+  ZtZ_inv,
+  ZtX,
+  rss,
+  df_res,
+  nobs_orig,
+  n_params,
+  meat = NULL
+) {
+  if (vcov_type == "iid") {
+    sigma2 = rss / df_res
+    vcov_mat = sigma2 * A_inv
+    attr(vcov_mat, "type") = "iid"
+  } else {
+    if (is.null(meat)) {
+      stop("Robust IV variance requires a meat matrix.")
+    }
+    middle = Matrix::t(ZtX) %*% ZtZ_inv %*% meat %*% ZtZ_inv %*% ZtX
+    if (vcov_type == "hc1") {
+      scale_hc1 = nobs_orig / df_res
+      vcov_mat = scale_hc1 * (A_inv %*% middle %*% A_inv)
+      attr(vcov_mat, "type") = "hc1"
+    } else if (vcov_type == "cluster") {
+      n_clusters = attr(meat, "n_clusters")
+      if (is.null(n_clusters)) {
+        stop("Clustered IV meat matrix missing n_clusters attribute.")
+      }
+      scale_cr1 = (n_clusters / (n_clusters - 1)) * ((nobs_orig - 1) / (nobs_orig - n_params))
+      vcov_mat = scale_cr1 * (A_inv %*% middle %*% A_inv)
+      attr(vcov_mat, "type") = "cluster"
+      attr(vcov_mat, "n_clusters") = n_clusters
+    } else {
+      stop("Unsupported vcov type: ", vcov_type)
+    }
+  }
+  dimnames(vcov_mat) = dimnames(A_inv)
+  vcov_mat
+}
+
+#' @keywords internal
+compute_dbivreg_diagnostics = function(
+  inputs,
+  moms,
+  cte_sql,
+  cte_name,
+  has_intercept,
+  weights_expr,
+  n_total,
+  df_fe,
+  rss,
+  betahat,
+  S_zz,
+  S_zx,
+  S_zy,
+  meat
+) {
+  overid = compute_dbivreg_overid(
+    inputs = inputs,
+    betahat = betahat,
+    S_zz = S_zz,
+    S_zx = S_zx,
+    S_zy = S_zy,
+    rss = rss,
+    n_total = n_total,
+    meat = meat
+  )
+  list(
+    first_stage_f = compute_dbivreg_first_stage_f(
+      inputs = inputs,
+      moms = moms,
+      n_total = n_total,
+      df_fe = df_fe,
+      has_intercept = has_intercept
+    ),
+    first_stage_wald = compute_dbivreg_first_stage_wald(
+      inputs = inputs,
+      moms = moms,
+      cte_sql = cte_sql,
+      cte_name = cte_name,
+      has_intercept = has_intercept,
+      weights_expr = weights_expr,
+      n_total = n_total,
+      df_fe = df_fe
+    ),
+    overid = overid,
+    sargan = if (identical(overid$test, "Sargan")) overid else NULL,
+    hansen_j = if (identical(overid$test, "Hansen J")) overid else NULL
+  )
+}
+
+#' @keywords internal
+compute_dbivreg_first_stage_f = function(inputs, moms, n_total, df_fe, has_intercept) {
+  excluded = inputs[["instr_names"]]
+  endogenous = inputs[["endog_names"]]
+  if (!length(excluded) || !length(endogenous)) {
+    return(NULL)
+  }
+
+  unrestricted = c(if (has_intercept) "(Intercept)", inputs[["exog_names"]], excluded)
+  restricted = c(if (has_intercept) "(Intercept)", inputs[["exog_names"]])
+  q = length(excluded)
+  out = vector("list", length(endogenous))
+  names(out) = standardize_coef_names(endogenous)
+
+  for (j in seq_along(endogenous)) {
+    y_name = endogenous[j]
+    S_uu = moms$S_uu[unrestricted, unrestricted, drop = FALSE]
+    S_uy = moms$S_uu[unrestricted, y_name, drop = FALSE]
+    fit_u = solve_with_fallback(S_uu, S_uy)
+    beta_u = fit_u$betahat
+    yy = moms$S_uu[y_name, y_name]
+    rss_u = as.numeric(yy - 2 * Matrix::t(beta_u) %*% S_uy + Matrix::t(beta_u) %*% S_uu %*% beta_u)
+
+    if (length(restricted) == 0) {
+      rss_r = yy
+    } else {
+      S_rr = moms$S_uu[restricted, restricted, drop = FALSE]
+      S_ry = moms$S_uu[restricted, y_name, drop = FALSE]
+      fit_r = solve_with_fallback(S_rr, S_ry)
+      beta_r = fit_r$betahat
+      rss_r = as.numeric(yy - 2 * Matrix::t(beta_r) %*% S_ry + Matrix::t(beta_r) %*% S_rr %*% beta_r)
+    }
+
+    df2 = max(n_total - length(unrestricted) - df_fe, 1)
+    stat = ((rss_r - rss_u) / q) / (rss_u / df2)
+    stat = max(as.numeric(stat), 0)
+    out[[j]] = list(
+      stat = stat,
+      p = stats::pf(stat, q, df2, lower.tail = FALSE),
+      df1 = q,
+      df2 = df2
+    )
+  }
+
+  out
+}
+
+#' @keywords internal
+compute_dbivreg_first_stage_wald = function(
+  inputs,
+  moms,
+  cte_sql,
+  cte_name,
+  has_intercept,
+  weights_expr,
+  n_total,
+  df_fe
+) {
+  excluded = inputs[["instr_names"]]
+  endogenous = inputs[["endog_names"]]
+  if (!length(excluded) || !length(endogenous)) {
+    return(NULL)
+  }
+
+  unrestricted = c(if (has_intercept) "(Intercept)", inputs[["exog_names"]], excluded)
+  regressors = c(inputs[["exog_names"]], excluded)
+  is_athena = inherits(inputs[["conn"]], "AthenaConnection")
+  q = length(excluded)
+  vcov_label = format_vcov_label(inputs[["vcov_type_req"]], NULL)
+  out = vector("list", length(endogenous))
+  names(out) = standardize_coef_names(endogenous)
+
+  for (j in seq_along(endogenous)) {
+    y_name = endogenous[j]
+    S_uu = moms$S_uu[unrestricted, unrestricted, drop = FALSE]
+    S_uy = moms$S_uu[unrestricted, y_name, drop = FALSE]
+    fit_u = solve_with_fallback(S_uu, S_uy)
+    beta_u = fit_u$betahat
+    XtX_inv = fit_u$XtX_inv
+    rownames(beta_u) = unrestricted
+
+    yy = moms$S_uu[y_name, y_name]
+    rss_u = as.numeric(yy - 2 * Matrix::t(beta_u) %*% S_uy + Matrix::t(beta_u) %*% S_uu %*% beta_u)
+    df2 = max(n_total - length(unrestricted) - df_fe, 1)
+
+    if (inputs[["vcov_type_req"]] == "iid") {
+      vcov_u = (rss_u / df2) * XtX_inv
+      attr(vcov_u, "type") = "iid"
+    } else if (inputs[["vcov_type_req"]] == "hc1") {
+      meat = compute_meat_sql(
+        conn = inputs[["conn"]],
+        cte_sql = cte_sql,
+        vars = regressors,
+        yvar = y_name,
+        betahat = beta_u,
+        is_athena = is_athena,
+        var_suffix = if (has_intercept) "" else "_tilde",
+        cte_name = cte_name,
+        has_intercept = has_intercept,
+        vars_sql = if (has_intercept) regressors else paste0(regressors, "_tilde"),
+        weights_expr = weights_expr
+      )
+      vcov_u = compute_vcov(
+        vcov_type = "hc1",
+        strategy = if (has_intercept) "moments" else "demean",
+        XtX_inv = XtX_inv,
+        rss = rss_u,
+        df_res = df2,
+        nobs_orig = n_total,
+        n_params = length(unrestricted) + df_fe,
+        meat = meat
+      )
+      vcov_label = format_vcov_label("hc1", NULL)
+    } else if (inputs[["vcov_type_req"]] == "cluster") {
+      meat = compute_meat_cluster_sql(
+        conn = inputs[["conn"]],
+        cte_sql = cte_sql,
+        vars = regressors,
+        yvar = y_name,
+        betahat = beta_u,
+        cluster_var = inputs[["cluster_var"]],
+        is_athena = is_athena,
+        var_suffix = if (has_intercept) "" else "_tilde",
+        cte_name = cte_name,
+        has_intercept = has_intercept,
+        vars_sql = if (has_intercept) regressors else paste0(regressors, "_tilde"),
+        weights_expr = weights_expr
+      )
+      vcov_u = compute_vcov(
+        vcov_type = "cluster",
+        strategy = if (has_intercept) "moments" else "demean",
+        XtX_inv = XtX_inv,
+        rss = rss_u,
+        df_res = df2,
+        nobs_orig = n_total,
+        n_params = length(unrestricted) + df_fe,
+        meat = meat
+      )
+      n_clusters = attr(vcov_u, "n_clusters")
+      vcov_u = vcov_u * ((n_total - 1) / n_total)
+      attr(vcov_u, "type") = "cluster"
+      attr(vcov_u, "n_clusters") = n_clusters
+      vcov_label = format_vcov_label("cluster", attr(vcov_u, "n_clusters"))
+    } else {
+      stop("Unsupported vcov type for first-stage diagnostics: ", inputs[["vcov_type_req"]])
+    }
+
+    beta_q = beta_u[excluded, , drop = FALSE]
+    vcov_q = vcov_u[excluded, excluded, drop = FALSE]
+    inv_vcov_q = solve_with_fallback(vcov_q, beta_q)$betahat
+    stat = as.numeric(Matrix::t(beta_q) %*% inv_vcov_q) / q
+    stat = max(stat, 0)
+    out[[j]] = list(
+      stat = stat,
+      p = stats::pf(stat, q, df2, lower.tail = FALSE),
+      df1 = q,
+      df2 = df2,
+      vcov = vcov_label
+    )
+  }
+
+  out
+}
+
+#' @keywords internal
+compute_dbivreg_overid = function(inputs, betahat, S_zz, S_zx, S_zy, rss, n_total, meat = NULL) {
+  df = length(inputs[["instr_names"]]) - length(inputs[["endog_names"]])
+  if (df <= 0) {
+    test_name = if (identical(inputs[["vcov_type_req"]], "iid")) "Sargan" else "Hansen J"
+    return(list(stat = NA_real_, p = NA_real_, df = max(df, 0L), test = test_name))
+  }
+
+  moments = S_zy - S_zx %*% betahat
+  if (identical(inputs[["vcov_type_req"]], "iid")) {
+    z_inv = solve_with_fallback(S_zz, moments)$betahat
+    stat = as.numeric(Matrix::t(moments) %*% z_inv) / (rss / n_total)
+    test_name = "Sargan"
+  } else {
+    if (is.null(meat)) {
+      return(list(stat = NA_real_, p = NA_real_, df = df, test = "Hansen J"))
+    }
+    omega_inv_m = solve_with_fallback(meat, moments)$betahat
+    stat = as.numeric(Matrix::t(moments) %*% omega_inv_m)
+    test_name = "Hansen J"
+  }
+  stat = max(stat, 0)
+
+  list(
+    stat = stat,
+    p = stats::pchisq(stat, df = df, lower.tail = FALSE),
+    df = df,
+    test = test_name
+  )
+}
+
+#' @keywords internal
+compute_iv_meat_sql = function(
+  conn,
+  cte_sql,
+  x_vars,
+  z_vars,
+  yvar,
+  betahat,
+  weights_expr = NULL,
+  cte_name = "base",
+  has_intercept = TRUE,
+  is_athena = FALSE,
+  x_vars_sql = x_vars,
+  z_vars_sql = z_vars,
+  yvar_sql = yvar
+) {
+  beta_vals = as.numeric(betahat[c(if (has_intercept) "(Intercept)" else NULL, x_vars), 1])
+  if (has_intercept) {
+    intercept_val = beta_vals[1]
+    beta_vals = beta_vals[-1]
+  }
+
+  weight_sql = if (is.null(weights_expr)) NULL else sprintf("CAST((%s) * (%s) AS FLOAT)", weights_expr, weights_expr)
+  build_sum = function(parts, alias) {
+    parts = parts[!vapply(parts, is.null, logical(1))]
+    sprintf("SUM(%s) AS %s", paste(parts, collapse = " * "), alias)
+  }
+
+  beta_terms = if (length(x_vars_sql)) {
+    paste(sprintf("%.15g * %s", beta_vals, x_vars_sql), collapse = " + ")
+  } else {
+    "0"
+  }
+  if (has_intercept) {
+    resid_expr = sprintf("(%s - %.15g - (%s))", yvar_sql, intercept_val, beta_terms)
+  } else {
+    resid_expr = sprintf("(%s - (%s))", yvar_sql, beta_terms)
+  }
+
+  meat_terms = character(0)
+  if (has_intercept) {
+    meat_terms = c(meat_terms, build_sum(c(
+      sprintf("CAST(%s AS FLOAT)", resid_expr),
+      sprintf("CAST(%s AS FLOAT)", resid_expr),
+      weight_sql
+    ), "meat_0_0"))
+    for (j in seq_along(z_vars_sql)) {
+      meat_terms = c(meat_terms, build_sum(c(
+        sprintf("CAST(%s AS FLOAT)", resid_expr),
+        sprintf("CAST(%s AS FLOAT)", resid_expr),
+        sprintf("CAST(%s AS FLOAT)", z_vars_sql[j]),
+        weight_sql
+      ), sprintf("meat_0_%d", j)))
+    }
+  }
+  for (i in seq_along(z_vars_sql)) {
+    for (j in i:length(z_vars_sql)) {
+      meat_terms = c(meat_terms, build_sum(c(
+        sprintf("CAST(%s AS FLOAT)", resid_expr),
+        sprintf("CAST(%s AS FLOAT)", resid_expr),
+        sprintf("CAST(%s AS FLOAT)", z_vars_sql[i]),
+        sprintf("CAST(%s AS FLOAT)", z_vars_sql[j]),
+        weight_sql
+      ), sprintf("meat_%d_%d", i, j)))
+    }
+  }
+
+  meat_sql = paste0(
+    cte_sql,
+    ",\nmeat AS (SELECT ",
+    paste(meat_terms, collapse = ", "),
+    " FROM ",
+    cte_name,
+    ")\nSELECT * FROM meat"
+  )
+  if (is_athena) {
+    meat_sql = gsub("FLOAT", "REAL", meat_sql, fixed = TRUE)
+  }
+
+  meat_df = dbGetQuery(conn, meat_sql)
+  vars_all = if (has_intercept) c("(Intercept)", z_vars) else z_vars
+  meat_mat = matrix(0, length(vars_all), length(vars_all), dimnames = list(vars_all, vars_all))
+
+  if (has_intercept) {
+    meat_mat[1, 1] = meat_df$meat_0_0
+    for (j in seq_along(z_vars)) {
+      val = meat_df[[sprintf("meat_0_%d", j)]]
+      meat_mat[1, j + 1] = meat_mat[j + 1, 1] = val
+    }
+  }
+  for (i in seq_along(z_vars)) {
+    for (j in i:length(z_vars)) {
+      val = meat_df[[sprintf("meat_%d_%d", i, j)]]
+      idx_i = if (has_intercept) i + 1 else i
+      idx_j = if (has_intercept) j + 1 else j
+      meat_mat[idx_i, idx_j] = meat_mat[idx_j, idx_i] = val
+    }
+  }
+  meat_mat
+}
+
+#' @keywords internal
+compute_iv_meat_cluster_sql = function(
+  conn,
+  cte_sql,
+  x_vars,
+  z_vars,
+  yvar,
+  betahat,
+  cluster_var,
+  weights_expr = NULL,
+  cte_name = "base",
+  has_intercept = TRUE,
+  is_athena = FALSE,
+  x_vars_sql = x_vars,
+  z_vars_sql = z_vars,
+  yvar_sql = yvar
+) {
+  beta_vals = as.numeric(betahat[c(if (has_intercept) "(Intercept)" else NULL, x_vars), 1])
+  if (has_intercept) {
+    intercept_val = beta_vals[1]
+    beta_vals = beta_vals[-1]
+  }
+
+  weight_sql = if (is.null(weights_expr)) NULL else sprintf("CAST(%s AS FLOAT)", weights_expr)
+  build_sum = function(parts, alias) {
+    parts = parts[!vapply(parts, is.null, logical(1))]
+    sprintf("SUM(%s) AS %s", paste(parts, collapse = " * "), alias)
+  }
+
+  beta_terms = if (length(x_vars_sql)) {
+    paste(sprintf("%.15g * %s", beta_vals, x_vars_sql), collapse = " + ")
+  } else {
+    "0"
+  }
+  if (has_intercept) {
+    resid_expr = sprintf("(%s - %.15g - (%s))", yvar_sql, intercept_val, beta_terms)
+  } else {
+    resid_expr = sprintf("(%s - (%s))", yvar_sql, beta_terms)
+  }
+
+  score_terms = character(0)
+  if (has_intercept) {
+    score_terms = c(score_terms, build_sum(c(
+      sprintf("CAST(%s AS FLOAT)", resid_expr),
+      weight_sql
+    ), "score_0"))
+  }
+  for (j in seq_along(z_vars_sql)) {
+    score_terms = c(score_terms, build_sum(c(
+      sprintf("CAST(%s AS FLOAT)", resid_expr),
+      sprintf("CAST(%s AS FLOAT)", z_vars_sql[j]),
+      weight_sql
+    ), sprintf("score_%d", j)))
+  }
+
+  cluster_sql = paste0(
+    cte_sql,
+    ",\ncluster_scores AS (\n  SELECT ",
+    cluster_var,
+    ", ",
+    paste(score_terms, collapse = ", "),
+    "\n  FROM ",
+    cte_name,
+    "\n  GROUP BY ",
+    cluster_var,
+    "\n)\nSELECT * FROM cluster_scores"
+  )
+  if (is_athena) {
+    cluster_sql = gsub("FLOAT", "REAL", cluster_sql, fixed = TRUE)
+  }
+
+  cluster_df = dbGetQuery(conn, cluster_sql)
+  vars_all = if (has_intercept) c("(Intercept)", z_vars) else z_vars
+  meat_mat = matrix(0, length(vars_all), length(vars_all), dimnames = list(vars_all, vars_all))
+  score_cols = if (has_intercept) c("score_0", paste0("score_", seq_along(z_vars))) else paste0("score_", seq_along(z_vars))
+
+  for (i in seq_len(nrow(cluster_df))) {
+    s_g = as.numeric(cluster_df[i, score_cols])
+    meat_mat = meat_mat + tcrossprod(s_g)
+  }
+  attr(meat_mat, "n_clusters") = nrow(cluster_df)
+  meat_mat
+}
+
+#' Print method for dbivreg objects
+#'
+#' @param x A `dbivreg` object.
+#' @param ... Additional unused arguments.
+#' @export
+print.dbivreg = function(x, ...) {
+  ct = x[["coeftable"]]
+  colnames(ct) = c("Estimate", "Std. Error", "t value", "Pr(>|t|)")
+
+  se_type = attr(x$vcov, "type")
+  n_clusters = attr(x$vcov, "n_clusters")
+  se_type = format_vcov_label(se_type, n_clusters)
+
+  if (x$strategy == "moments") {
+    cat("Moments-based 2SLS estimation, Dep. Var.:", x$yvar, "\n")
+    cat("Observations.:", prettyNum(x$nobs_orig, big.mark = ","), "\n")
+  } else if (x$strategy == "demean") {
+    mstring = if (length(x$fe) == 1) {
+      "Demeaned"
+    } else if (length(x$fe) == 2) {
+      "Double Demeaned"
+    } else {
+      "Multi-way Demeaned"
+    }
+    cat(mstring, "2SLS estimation, Dep. Var.:", x$yvar, "\n")
+    cat("Observations.:", prettyNum(x$nobs_orig, big.mark = ","), "\n")
+  }
+  cat("Endogenous:", paste(x$endogenous, collapse = ", "), "\n")
+  cat("Excluded instruments:", paste(x$instruments, collapse = ", "), "\n")
+  cat("Standard Errors:", se_type, "\n")
+  if (!is.null(x$diagnostics$first_stage_f)) {
+    fst_lines = vapply(
+      names(x$diagnostics$first_stage_f),
+      function(nm) {
+        diag = x$diagnostics$first_stage_f[[nm]]
+        sprintf("%s = %.3f (p=%.3g)", nm, diag$stat, diag$p)
+      },
+      character(1)
+    )
+    cat("First-stage F:", paste(fst_lines, collapse = ", "), "\n")
+  }
+  if (!identical(se_type, "IID") && !is.null(x$diagnostics$first_stage_wald)) {
+    wald_lines = vapply(
+      names(x$diagnostics$first_stage_wald),
+      function(nm) {
+        diag = x$diagnostics$first_stage_wald[[nm]]
+        sprintf("%s = %.3f (p=%.3g)", nm, diag$stat, diag$p)
+      },
+      character(1)
+    )
+    cat("First-stage Wald:", paste(wald_lines, collapse = ", "), "\n")
+  }
+  if (!is.null(x$diagnostics$overid) && is.finite(x$diagnostics$overid$stat)) {
+    cat(
+      paste0("Over-ID (", x$diagnostics$overid$test, "):"),
+      sprintf("%.3f (p=%.3g, df=%d)", x$diagnostics$overid$stat, x$diagnostics$overid$p, x$diagnostics$overid$df),
+      "\n"
+    )
+  }
+
+  print_coeftable(ct, gof_vals = gof(x), has_fes = !is.null(x$fe))
+  invisible(ct)
+}
diff --git a/R/print.R b/R/print.R
index ea6f358..1619b6e 100644
--- a/R/print.R
+++ b/R/print.R
@@ -36,16 +36,7 @@ print.dbreg = function(x, fe = FALSE, ...) {
   }
   se_type = attr(x[["vcov"]], "type")
   n_clusters = attr(x[["vcov"]], "n_clusters")
-  se_type = switch(
-    se_type,
-    "iid" = "IID",
-    "hc1" = "Heteroskedasticity-robust",
-    "cluster" = if (!is.null(n_clusters)) {
-      sprintf("Clustered (%d clusters)", n_clusters)
-    } else {
-      "Clustered"
-    }
-  )
+  se_type = format_vcov_label(se_type, n_clusters)
   if (x[["strategy"]] == "compress") {
     cat("Compressed OLS estimation, Dep. Var.:", x[["yvar"]], "\n")
     cat(
@@ -236,4 +227,3 @@ print.dbbinsreg = function(x, ...) {
   
   invisible(x)
 }
-
diff --git a/R/utils.R b/R/utils.R
index d7b9137..8546711 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -20,6 +20,18 @@ env2env = function(source, target, keys = NULL) {
 #' @keywords internal
 standardize_coef_names = function(x) gsub("_x_", ":", x, fixed = TRUE)
 
+#' Format variance-covariance type labels for printing
+#' @keywords internal
+format_vcov_label = function(vcov_type, n_clusters = NULL) {
+  switch(
+    vcov_type,
+    "iid" = "IID",
+    "hc1" = "Heteroskedasticity-robust",
+    "cluster" = if (!is.null(n_clusters)) sprintf("Clustered (%d clusters)", n_clusters) else "Clustered",
+    vcov_type
+  )
+}
+
 #' Generate coefficient table from estimates and vcov matrix
 #' @keywords internal
 gen_coeftable = function(betahat, vcov_mat, df_residual) {
diff --git a/inst/tinytest/test_dbivreg.R b/inst/tinytest/test_dbivreg.R
new file mode 100644
index 0000000..8faf0c7
--- /dev/null
+++ b/inst/tinytest/test_dbivreg.R
@@ -0,0 +1,534 @@
+library(dbreg)
+library(fixest)
+
+rename_fixest_iv = function(x) {
+  names(x) = sub("^fit_", "", names(x))
+  x
+}
+
+extract_fixest_ivstat = function(stats, type, term) {
+  key = paste0(type, "::", term)
+  if (!is.null(stats[[key]])) {
+    return(stats[[key]])
+  }
+  if (all(c("stat", "p", "df1", "df2") %in% names(stats))) {
+    return(stats)
+  }
+  NULL
+}
+
+expect_iv_match = function(db_fit, fx_fit, tol_coef = 1e-6, tol_se = 1e-6, label = "IV") {
+  fx_coef = rename_fixest_iv(coef(fx_fit))
+  expect_true(
+    max(abs(db_fit$coeftable[names(fx_coef), "estimate"] - fx_coef)) < tol_coef,
+    info = paste(label, "coefficients match fixest")
+  )
+
+  fx_se = rename_fixest_iv(se(fx_fit))
+  expect_true(
+    max(abs(db_fit$coeftable[names(fx_se), "std.error"] - fx_se)) < tol_se,
+    info = paste(label, "standard errors match fixest")
+  )
+}
+
+manual_hansen_j = function(Z, u, cluster = NULL, weights = NULL) {
+  if (is.null(weights)) {
+    weights = rep(1, length(u))
+  }
+  scores = Z * as.numeric(u * weights)
+  g = colSums(scores)
+
+  if (is.null(cluster)) {
+    omega = crossprod(scores)
+  } else {
+    omega = matrix(0, ncol(scores), ncol(scores))
+    for (cl in unique(cluster)) {
+      s_g = colSums(scores[cluster == cl, , drop = FALSE])
+      omega = omega + tcrossprod(s_g)
+    }
+  }
+
+  as.numeric(t(g) %*% qr.solve(omega, g))
+}
+
+set.seed(2026)
+
+tol = 1e-6
+tol_cluster = 1e-5
+
+n = 600L
+cluster = factor(sample(1:60, n, replace = TRUE))
+z = rnorm(n)
+x = rnorm(n)
+u = rnorm(n)
+d = 0.9 * z + 0.5 * x + rnorm(n)
+y = 1 + 2 * d + 0.3 * x + u
+w = runif(n, min = 0.4, max = 2.2)
+
+dat = data.frame(y = y, d = d, x = x, z = z, w = w, cluster = cluster)
+
+db_iid = dbivreg(
+  y ~ x | d ~ z,
+  data = dat,
+  weights = "w",
+  vcov = "iid",
+  strategy = "moments"
+)
+fx_iid = feols(y ~ x | d ~ z, data = dat, weights = ~w, vcov = "iid")
+expect_iv_match(db_iid, fx_iid, tol_coef = tol, tol_se = tol, label = "No-FE IID")
+fs_iid = fitstat(fx_iid, ~ ivf1 + sargan, simplify = TRUE)
+expect_equal(
+  db_iid$diagnostics$first_stage_f$d$stat,
+  extract_fixest_ivstat(fs_iid, "ivf1", "d")$stat,
+  tolerance = tol,
+  info = "No-FE IID first-stage F matches fixest"
+)
+expect_true(
+  is.na(db_iid$diagnostics$overid$stat) && is.na(fs_iid$sargan),
+  info = "Exactly identified models report no Sargan statistic"
+)
+
+db_hc1 = dbivreg(
+  y ~ x | d ~ z,
+  data = dat,
+  weights = "w",
+  vcov = "hc1",
+  strategy = "moments"
+)
+fx_hc1 = feols(y ~ x | d ~ z, data = dat, weights = ~w, vcov = "hc1")
+expect_iv_match(db_hc1, fx_hc1, tol_coef = tol, tol_se = tol, label = "No-FE HC1")
+
+db_cl = dbivreg(
+  y ~ x | d ~ z,
+  data = dat,
+  weights = "w",
+  vcov = ~cluster,
+  strategy = "moments"
+)
+fx_cl = feols(y ~ x | d ~ z, data = dat, weights = ~w, vcov = ~cluster)
+expect_iv_match(db_cl, fx_cl, tol_coef = tol, tol_se = tol_cluster, label = "No-FE cluster")
+fs_cl = fitstat(fx_cl, ~ ivf1 + ivwald1, simplify = TRUE)
+expect_equal(
+  db_cl$diagnostics$first_stage_wald$d$stat,
+  extract_fixest_ivstat(fs_cl, "ivwald1", "d")$stat,
+  tolerance = tol_cluster,
+  info = "No-FE cluster first-stage Wald matches fixest"
+)
+
+expect_true(
+  dbivreg(
+    y ~ x | d ~ z,
+    data = dat,
+    weights = "w",
+    vcov = "iid",
+    strategy = "auto"
+  )$strategy == "moments",
+  info = "Auto strategy selects moments when no fixed effects are present"
+)
+
+set.seed(2034)
+
+n_comp_cells = 80L
+comp_reps = sample(2:6, n_comp_cells, replace = TRUE)
+dat_comp_cell = data.frame(
+  cell = seq_len(n_comp_cells),
+  cluster = factor(sample(1:20, n_comp_cells, replace = TRUE))
+)
+dat_comp_cell$x = round(rnorm(n_comp_cells), 2)
+dat_comp_cell$z = round(rnorm(n_comp_cells), 2)
+v_comp_cell = rnorm(n_comp_cells)
+dat_comp_cell$d = round(0.8 * dat_comp_cell$z + 0.4 * dat_comp_cell$x + v_comp_cell, 2)
+
+dat_comp = dat_comp_cell[rep(seq_len(n_comp_cells), comp_reps), ]
+rownames(dat_comp) = NULL
+dat_comp$w = runif(nrow(dat_comp), min = 0.5, max = 2.0)
+dat_comp$y = 1.0 + 2.0 * dat_comp$d + 0.3 * dat_comp$x +
+  0.6 * v_comp_cell[dat_comp$cell] + rnorm(nrow(dat_comp))
+
+db_comp_hc1 = dbivreg(
+  y ~ x | d ~ z,
+  data = dat_comp,
+  weights = "w",
+  vcov = "hc1",
+  strategy = "compress"
+)
+fx_comp_hc1 = feols(y ~ x | d ~ z, data = dat_comp, weights = ~w, vcov = "hc1")
+expect_iv_match(db_comp_hc1, fx_comp_hc1, tol_coef = tol, tol_se = tol, label = "Compressed IV HC1")
+expect_true(
+  db_comp_hc1$nobs < db_comp_hc1$nobs_orig && db_comp_hc1$compression_ratio < 1,
+  info = "Compressed IV records fewer design rows than original observations"
+)
+fs_comp_hc1 = fitstat(fx_comp_hc1, ~ ivwald1, simplify = TRUE)
+expect_equal(
+  db_comp_hc1$diagnostics$first_stage_wald$d$stat,
+  extract_fixest_ivstat(fs_comp_hc1, "ivwald1", "d")$stat,
+  tolerance = tol,
+  info = "Compressed IV first-stage Wald matches fixest"
+)
+
+db_comp_cl = dbivreg(
+  y ~ x | d ~ z,
+  data = dat_comp,
+  weights = "w",
+  vcov = ~cluster,
+  strategy = "compress"
+)
+fx_comp_cl = feols(y ~ x | d ~ z, data = dat_comp, weights = ~w, vcov = ~cluster)
+expect_iv_match(db_comp_cl, fx_comp_cl, tol_coef = tol, tol_se = tol_cluster, label = "Compressed IV cluster")
+
+sql_comp = invisible(capture.output(
+  dbivreg(
+    y ~ x | d ~ z,
+    data = dat_comp,
+    strategy = "compress",
+    sql_only = TRUE
+  )
+))
+expect_true(any(grepl("compressed AS", sql_comp)), info = "Compressed IV sql_only exposes compressed CTE")
+
+set.seed(2027)
+
+n_fe = 700L
+dat_fe1 = data.frame(
+  fe1 = factor(sample(1:25, n_fe, replace = TRUE)),
+  cluster = factor(sample(1:35, n_fe, replace = TRUE))
+)
+
+alpha = rnorm(25)[dat_fe1$fe1]
+dat_fe1$z = rnorm(n_fe)
+dat_fe1$x = rnorm(n_fe)
+v = rnorm(n_fe)
+dat_fe1$d = 0.8 * dat_fe1$z + 0.4 * dat_fe1$x + alpha + v
+dat_fe1$y = 2.0 * dat_fe1$d + 0.3 * dat_fe1$x + alpha + 0.6 * v + rnorm(n_fe)
+dat_fe1$w = runif(n_fe, min = 0.5, max = 2.0)
+
+db_fe1 = dbivreg(
+  y ~ x | fe1 | d ~ z,
+  data = dat_fe1,
+  weights = "w",
+  vcov = "hc1",
+  strategy = "demean"
+)
+fx_fe1 = feols(y ~ x | fe1 | d ~ z, data = dat_fe1, weights = ~w, vcov = "hc1")
+expect_iv_match(db_fe1, fx_fe1, tol_coef = tol, tol_se = tol, label = "One-way FE HC1")
+
+db_fe1_within = dbivreg(
+  y ~ x | fe1 | d ~ z,
+  data = dat_fe1,
+  weights = "w",
+  vcov = "iid",
+  strategy = "within"
+)
+fx_fe1_iid = feols(y ~ x | fe1 | d ~ z, data = dat_fe1, weights = ~w, vcov = "iid")
+expect_iv_match(db_fe1_within, fx_fe1_iid, tol_coef = tol, tol_se = tol, label = "Within alias")
+expect_true(db_fe1_within$strategy == "demean", info = "Within alias normalizes to demean")
+
+expect_true(
+  dbivreg(
+    y ~ x | fe1 | d ~ z,
+    data = dat_fe1,
+    weights = "w",
+    vcov = "iid",
+    strategy = "auto"
+  )$strategy == "demean",
+  info = "Auto strategy selects demean when fixed effects are present"
+)
+
+set.seed(2028)
+
+n_units = 20L
+n_time = 5L
+dat_fe2 = expand.grid(unit = 1:n_units, time = 1:n_time)
+dat_fe2$fe1 = factor(dat_fe2$unit)
+dat_fe2$fe2 = factor(dat_fe2$time)
+
+alpha_2 = rnorm(n_units)[dat_fe2$unit]
+tau_2 = rnorm(n_time)[dat_fe2$time]
+dat_fe2$z = rnorm(nrow(dat_fe2))
+dat_fe2$x = rnorm(nrow(dat_fe2))
+v_2 = rnorm(nrow(dat_fe2))
+dat_fe2$d = 0.9 * dat_fe2$z + 0.4 * dat_fe2$x + alpha_2 + tau_2 + v_2
+dat_fe2$y = 2.0 * dat_fe2$d + 0.3 * dat_fe2$x + alpha_2 + tau_2 + 0.5 * v_2 + rnorm(nrow(dat_fe2))
+
+db_fe2 = dbivreg(
+  y ~ x | fe1 + fe2 | d ~ z,
+  data = dat_fe2,
+  vcov = "iid",
+  strategy = "demean"
+)
+fx_fe2 = feols(y ~ x | fe1 + fe2 | d ~ z, data = dat_fe2, vcov = "iid")
+expect_iv_match(db_fe2, fx_fe2, tol_coef = tol, tol_se = tol, label = "Two-way FE balanced IID")
+fs_fe2 = fitstat(fx_fe2, ~ ivf1 + ivwald1, simplify = TRUE)
+expect_equal(
+  db_fe2$diagnostics$first_stage_f$d$stat,
+  extract_fixest_ivstat(fs_fe2, "ivf1", "d")$stat,
+  tolerance = tol,
+  info = "Two-way FE first-stage F matches fixest"
+)
+
+set.seed(2029)
+
+n_units_ap = 30L
+n_time_ap = 6L
+dat_ap = expand.grid(unit = 1:n_units_ap, time = 1:n_time_ap)
+dat_ap$fe1 = factor(dat_ap$unit)
+dat_ap$fe2 = factor(dat_ap$time)
+dat_ap$cluster = factor(sample(1:30, nrow(dat_ap), replace = TRUE))
+
+alpha_ap = rnorm(n_units_ap)[dat_ap$unit]
+tau_ap = rnorm(n_time_ap)[dat_ap$time]
+dat_ap$z = rnorm(nrow(dat_ap))
+dat_ap$x = rnorm(nrow(dat_ap))
+v_ap = rnorm(nrow(dat_ap))
+dat_ap$d = 0.8 * dat_ap$z + 0.5 * dat_ap$x + alpha_ap + tau_ap + v_ap
+dat_ap$y = 2.0 * dat_ap$d + 0.3 * dat_ap$x + alpha_ap + tau_ap + 0.6 * v_ap + rnorm(nrow(dat_ap))
+dat_ap$w = runif(nrow(dat_ap), min = 0.5, max = 2.0)
+
+drop_idx = sample(seq_len(nrow(dat_ap)), size = round(0.2 * nrow(dat_ap)))
+dat_ap = dat_ap[-drop_idx, ]
+
+db_ap = dbivreg(
+  y ~ x | fe1 + fe2 | d ~ z,
+  data = dat_ap,
+  weights = "w",
+  vcov = ~cluster,
+  strategy = "demean"
+)
+fx_ap = feols(y ~ x | fe1 + fe2 | d ~ z, data = dat_ap, weights = ~w, vcov = ~cluster)
+expect_iv_match(db_ap, fx_ap, tol_coef = tol, tol_se = tol_cluster, label = "Two-way FE AP cluster")
+fs_ap = fitstat(fx_ap, ~ ivf1 + ivwald1, simplify = TRUE)
+expect_equal(
+  db_ap$diagnostics$first_stage_wald$d$stat,
+  extract_fixest_ivstat(fs_ap, "ivwald1", "d")$stat,
+  tolerance = tol_cluster,
+  info = "Two-way FE cluster first-stage Wald matches fixest"
+)
+
+set.seed(2033)
+
+n_units_fe3 = 18L
+n_time_fe3 = 4L
+n_market_fe3 = 3L
+dat_fe3 = expand.grid(unit = 1:n_units_fe3, time = 1:n_time_fe3, market = 1:n_market_fe3)
+dat_fe3$fe1 = factor(dat_fe3$unit)
+dat_fe3$fe2 = factor(dat_fe3$time)
+dat_fe3$fe3 = factor(dat_fe3$market)
+
+alpha_3 = rnorm(n_units_fe3)[dat_fe3$unit]
+tau_3 = rnorm(n_time_fe3)[dat_fe3$time]
+gamma_3 = rnorm(n_market_fe3)[dat_fe3$market]
+dat_fe3$z = rnorm(nrow(dat_fe3))
+dat_fe3$x = rnorm(nrow(dat_fe3))
+v_3 = rnorm(nrow(dat_fe3))
+dat_fe3$d = 0.8 * dat_fe3$z + 0.4 * dat_fe3$x + alpha_3 + tau_3 + gamma_3 + v_3
+dat_fe3$y = 1.9 * dat_fe3$d + 0.25 * dat_fe3$x + alpha_3 + tau_3 + gamma_3 +
+  0.5 * v_3 + rnorm(nrow(dat_fe3))
+
+db_fe3 = dbivreg(
+  y ~ x | fe1 + fe2 + fe3 | d ~ z,
+  data = dat_fe3,
+  vcov = "iid",
+  strategy = "demean"
+)
+fx_fe3 = feols(y ~ x | fe1 + fe2 + fe3 | d ~ z, data = dat_fe3, vcov = "iid")
+expect_iv_match(db_fe3, fx_fe3, tol_coef = tol, tol_se = tol, label = "Three-way FE AP IID")
+fs_fe3 = fitstat(fx_fe3, ~ ivf1, simplify = TRUE)
+expect_equal(
+  db_fe3$diagnostics$first_stage_f$d$stat,
+  extract_fixest_ivstat(fs_fe3, "ivf1", "d")$stat,
+  tolerance = tol,
+  info = "Three-way FE first-stage F matches fixest"
+)
+expect_equal(
+  db_fe3$n_fe_levels,
+  c(fe1 = n_units_fe3, fe2 = n_time_fe3, fe3 = n_market_fe3),
+  info = "Three-way FE levels are retained in the fitted object"
+)
+expect_equal(
+  db_fe3$n_fe3,
+  n_market_fe3,
+  info = "Third fixed-effect count is available by position"
+)
+
+set.seed(2032)
+
+n_over = 700L
+dat_over = data.frame(
+  x = rnorm(n_over),
+  z1 = rnorm(n_over),
+  z2 = rnorm(n_over),
+  cluster = factor(sample(1:50, n_over, replace = TRUE))
+)
+v_over = rnorm(n_over)
+dat_over$d = 0.8 * dat_over$z1 + 0.3 * dat_over$z2 + 0.4 * dat_over$x + v_over
+dat_over$y = 1.0 + 2.0 * dat_over$d + 0.4 * dat_over$x + 0.6 * v_over + rnorm(n_over)
+
+db_over = dbivreg(
+  y ~ x | d ~ z1 + z2,
+  data = dat_over,
+  vcov = "iid",
+  strategy = "moments"
+)
+fx_over = feols(y ~ x | d ~ z1 + z2, data = dat_over, vcov = "iid")
+expect_iv_match(db_over, fx_over, tol_coef = tol, tol_se = tol, label = "Overidentified IID")
+fs_over = fitstat(fx_over, ~ ivf1 + sargan, simplify = TRUE)
+expect_equal(
+  db_over$diagnostics$first_stage_f$d$stat,
+  extract_fixest_ivstat(fs_over, "ivf1", "d")$stat,
+  tolerance = tol,
+  info = "Overidentified first-stage F matches fixest"
+)
+expect_equal(
+  db_over$diagnostics$overid$stat,
+  fs_over$sargan$stat,
+  tolerance = tol,
+  info = "Sargan statistic matches fixest"
+)
+expect_equal(
+  db_over$diagnostics$overid$p,
+  fs_over$sargan$p,
+  tolerance = tol,
+  info = "Sargan p-value matches fixest"
+)
+expect_true(
+  identical(db_over$diagnostics$overid$test, "Sargan"),
+  info = "IID overidentification test is labeled Sargan"
+)
+
+db_over_hc1 = dbivreg(
+  y ~ x | d ~ z1 + z2,
+  data = dat_over,
+  vcov = "hc1",
+  strategy = "moments"
+)
+Z_over = cbind("(Intercept)" = 1, x = dat_over$x, z1 = dat_over$z1, z2 = dat_over$z2)
+X_over = cbind("(Intercept)" = 1, x = dat_over$x, d = dat_over$d)
+u_over_hc1 = as.numeric(dat_over$y - X_over %*% coef(db_over_hc1)[colnames(X_over)])
+expect_equal(
+  db_over_hc1$diagnostics$overid$stat,
+  manual_hansen_j(Z_over, u_over_hc1),
+  tolerance = tol,
+  info = "HC1 Hansen J matches manual calculation"
+)
+expect_true(
+  identical(db_over_hc1$diagnostics$overid$test, "Hansen J"),
+  info = "HC1 overidentification test is labeled Hansen J"
+)
+
+db_over_cl = dbivreg(
+  y ~ x | d ~ z1 + z2,
+  data = dat_over,
+  vcov = ~cluster,
+  strategy = "moments"
+)
+u_over_cl = as.numeric(dat_over$y - X_over %*% coef(db_over_cl)[colnames(X_over)])
+expect_equal(
+  db_over_cl$diagnostics$overid$stat,
+  manual_hansen_j(Z_over, u_over_cl, cluster = dat_over$cluster),
+  tolerance = tol_cluster,
+  info = "Cluster-robust Hansen J matches manual calculation"
+)
+expect_true(
+  identical(db_over_cl$diagnostics$overid$test, "Hansen J"),
+  info = "Cluster overidentification test is labeled Hansen J"
+)
+
+set.seed(2030)
+
+n_fac = 900L
+dat_fac = data.frame(
+  g = factor(sample(letters[1:3], n_fac, replace = TRUE))
+)
+g_b = as.numeric(dat_fac$g == "b")
+g_c = as.numeric(dat_fac$g == "c")
+dat_fac$x = rnorm(n_fac)
+dat_fac$z = rnorm(n_fac)
+v_fac = rnorm(n_fac)
+dat_fac$d = 0.8 * dat_fac$z + 0.4 * dat_fac$x + 0.6 * g_b - 0.3 * g_c +
+  0.5 * dat_fac$z * g_b - 0.4 * dat_fac$z * g_c + v_fac
+dat_fac$y = 1.0 + 1.8 * dat_fac$d + 0.9 * dat_fac$d * g_b - 0.7 * dat_fac$d * g_c +
+  0.3 * dat_fac$x + 0.5 * g_b - 0.2 * g_c + 0.7 * v_fac + rnorm(n_fac)
+
+db_fac = dbivreg(
+  y ~ x + g | d + d:g ~ z + z:g,
+  data = dat_fac,
+  vcov = "hc1",
+  strategy = "moments"
+)
+fx_fac = feols(y ~ x + g | d + d:g ~ z + z:g, data = dat_fac, vcov = "hc1")
+expect_iv_match(db_fac, fx_fac, tol_coef = tol, tol_se = tol, label = "Factor-expanded IV moments")
+fs_fac = fitstat(fx_fac, ~ ivf1 + ivwald1, simplify = TRUE)
+for (nm in names(db_fac$diagnostics$first_stage_wald)) {
+  expect_equal(
+    db_fac$diagnostics$first_stage_wald[[nm]]$stat,
+    extract_fixest_ivstat(fs_fac, "ivwald1", nm)$stat,
+    tolerance = tol_cluster,
+    info = paste("Factor-expanded first-stage Wald matches fixest for", nm)
+  )
+}
+
+sql_fac = invisible(capture.output(
+  dbivreg(
+    y ~ x + g | d + d:g ~ z + z:g,
+    data = dat_fac,
+    sql_only = TRUE
+  )
+))
+expect_true(any(grepl("CASE WHEN", sql_fac)), info = "sql_only expands factor IV terms in SQL")
+
+set.seed(2031)
+
+n_fac_fe = 850L
+dat_fac_fe = data.frame(
+  g = factor(sample(letters[1:3], n_fac_fe, replace = TRUE)),
+  fe = factor(sample(1:30, n_fac_fe, replace = TRUE))
+)
+gfe_b = as.numeric(dat_fac_fe$g == "b")
+gfe_c = as.numeric(dat_fac_fe$g == "c")
+alpha_fe = rnorm(30)[dat_fac_fe$fe]
+dat_fac_fe$x = rnorm(n_fac_fe)
+dat_fac_fe$z = rnorm(n_fac_fe)
+v_fac_fe = rnorm(n_fac_fe)
+dat_fac_fe$d = 0.7 * dat_fac_fe$z + 0.3 * dat_fac_fe$x + 0.5 * gfe_b - 0.2 * gfe_c +
+  0.4 * dat_fac_fe$z * gfe_b - 0.3 * dat_fac_fe$z * gfe_c + alpha_fe + v_fac_fe
+dat_fac_fe$y = 1.5 * dat_fac_fe$d + 0.7 * dat_fac_fe$d * gfe_b - 0.5 * dat_fac_fe$d * gfe_c +
+  0.4 * dat_fac_fe$x + 0.4 * gfe_b - 0.1 * gfe_c + alpha_fe + 0.6 * v_fac_fe + rnorm(n_fac_fe)
+
+db_fac_fe = dbivreg(
+  y ~ x + g | fe | d + d:g ~ z + z:g,
+  data = dat_fac_fe,
+  vcov = "iid",
+  strategy = "demean"
+)
+fx_fac_fe = feols(y ~ x + g | fe | d + d:g ~ z + z:g, data = dat_fac_fe, vcov = "iid")
+expect_iv_match(db_fac_fe, fx_fac_fe, tol_coef = tol, tol_se = tol, label = "Factor-expanded IV demean")
+
+expect_error(
+  dbivreg(y ~ x | d ~ z, data = dat, endog = ~d, instruments = ~z),
+  "no longer supported"
+)
+
+expect_error(
+  dbivreg(y ~ x | fe1, data = dat_fe1),
+  "fixest-style IV formula|IV specification|IV block"
+)
+
+expect_error(
+  dbivreg(y ~ x | d ~ z, data = dat, strategy = "mundlak"),
+  "does not yet implement strategy = 'mundlak'"
+)
+
+expect_error(
+  dbivreg(y ~ x | fe1 | d ~ z, data = dat_fe1, strategy = "compress"),
+  "only available without fixed effects"
+)
+
+expect_error(
+  dbivreg(y ~ x | fe1 | d ~ z, data = dat_fe1, strategy = "moments"),
+  "only available without fixed effects"
+)
+
+expect_error(
+  dbivreg(y ~ x | d ~ z, data = dat, strategy = "demean"),
+  "requires at least one fixed effect"
+)
diff --git a/inst/tinytest/test_dbivreg_nyc.R b/inst/tinytest/test_dbivreg_nyc.R
new file mode 100644
index 0000000..693d939
--- /dev/null
+++ b/inst/tinytest/test_dbivreg_nyc.R
@@ -0,0 +1,112 @@
+# NYC Taxi tests (large data smoke + backend validation)
+# -----------------------------------------------------------------------------
+
+if (!tolower(Sys.getenv("DBREG_TEST_NYC")) %in% c("true", "1")) {
+  exit_file("Run `Sys.setenv(DBREG_TEST_NYC = 'TRUE')` to enable NYC taxi tests")
+}
+
+nyc_path = here::here("nyc-taxi/year=2012")
+if (!dir.exists(nyc_path)) {
+  exit_file("NYC taxi data not found at nyc-taxi/year=2012")
+}
+
+library(dbreg)
+library(DBI)
+library(duckdb)
+library(fixest)
+
+rename_fixest_iv = function(x) {
+  names(x) = sub("^fit_", "", names(x))
+  x
+}
+
+con = dbConnect(duckdb::duckdb(), shutdown = TRUE)
+
+dbExecute(con, sprintf("
+  CREATE VIEW nyc_iv AS
+  SELECT
+    trip_distance AS x,
+    fare_amount / 10.0 AS z,
+    passenger_count AS fe1,
+    dayofweek(dropoff_datetime) AS fe2,
+    5.0 * (fare_amount / 10.0) + 0.5 * trip_distance +
+      0.8 * passenger_count + 0.2 * dayofweek(dropoff_datetime) + tip_amount / 10.0 AS d,
+    1.0 + 2.0 * (
+      5.0 * (fare_amount / 10.0) + 0.5 * trip_distance +
+        0.8 * passenger_count + 0.2 * dayofweek(dropoff_datetime) + tip_amount / 10.0
+    ) +
+      0.3 * trip_distance + 0.8 * passenger_count +
+      0.2 * dayofweek(dropoff_datetime) + tip_amount / 20.0 AS y,
+    1.0 + passenger_count / 10.0 AS w
+  FROM read_parquet('%s/month=1/*.parquet')
+  WHERE trip_distance > 0 AND trip_distance < 20
+    AND fare_amount > 0 AND fare_amount < 100
+    AND passenger_count > 0
+  LIMIT 30000
+", nyc_path))
+
+nyc_local = dbGetQuery(con, "SELECT * FROM nyc_iv")
+
+nyc_iv = dbivreg(
+  y ~ x | d ~ z,
+  conn = con,
+  table = "nyc_iv",
+  weights = "w",
+  vcov = "hc1"
+)
+
+expect_true(
+  inherits(nyc_iv, "dbivreg"),
+  info = "dbivreg returns dbivreg object on NYC backend data"
+)
+expect_true(
+  all(is.finite(nyc_iv$coeftable[, c("estimate", "std.error")])),
+  info = "dbivreg returns finite coefficients and standard errors on NYC backend data"
+)
+
+fx_iv = feols(y ~ x | d ~ z, data = nyc_local, weights = ~w, vcov = "hc1")
+fx_coef = rename_fixest_iv(coef(fx_iv))
+expect_true(
+  max(abs(nyc_iv$coeftable[names(fx_coef), "estimate"] - fx_coef)) < 1e-6,
+  info = "dbivreg coefficients match fixest on NYC backend view"
+)
+
+fx_se = rename_fixest_iv(se(fx_iv))
+expect_true(
+  max(abs(nyc_iv$coeftable[names(fx_se), "std.error"] - fx_se)) < 1e-6,
+  info = "dbivreg HC1 standard errors match fixest on NYC backend view"
+)
+
+nyc_iv_fe = dbivreg(
+  y ~ x | fe1 + fe2 | d ~ z,
+  conn = con,
+  table = "nyc_iv",
+  weights = "w",
+  vcov = "hc1",
+  strategy = "auto"
+)
+
+expect_true(
+  nyc_iv_fe$strategy == "demean",
+  info = "dbivreg auto strategy selects demean on NYC FE-IV backend data"
+)
+expect_true(
+  all(is.finite(nyc_iv_fe$coeftable[, c("estimate", "std.error")])),
+  info = "dbivreg FE-IV returns finite coefficients and standard errors on NYC backend data"
+)
+
+fx_iv_fe = feols(y ~ x | fe1 + fe2 | d ~ z, data = nyc_local, weights = ~w, vcov = "hc1")
+fx_fe_coef = rename_fixest_iv(coef(fx_iv_fe))
+expect_true(
+  max(abs(nyc_iv_fe$coeftable[names(fx_fe_coef), "estimate"] - fx_fe_coef)) < 1e-6,
+  info = "dbivreg FE-IV coefficients match fixest on NYC backend view"
+)
+
+fx_fe_se = rename_fixest_iv(se(fx_iv_fe))
+expect_true(
+  max(abs(nyc_iv_fe$coeftable[names(fx_fe_se), "std.error"] - fx_fe_se)) < 1e-6,
+  info = "dbivreg FE-IV HC1 standard errors match fixest on NYC backend view"
+)
+
+dbDisconnect(con)
+rm(con)
diff --git a/man/dbivreg.Rd b/man/dbivreg.Rd
new file mode 100644
index 0000000..a886ee2
--- /dev/null
+++ b/man/dbivreg.Rd
@@ -0,0 +1,78 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dbivreg.R
+\name{dbivreg}
+\alias{dbivreg}
+\title{Instrumental-variables regression on a database backend}
+\usage{
+dbivreg(
+  fml,
+  conn = NULL,
+  table = NULL,
+  data = NULL,
+  path = NULL,
+  weights = NULL,
+  vcov = c("iid", "hc1"),
+  strategy = c("auto", "moments", "demean", "within", "compress", "mundlak"),
+  cluster = NULL,
+  sql_only = FALSE,
+  data_only = FALSE,
+  drop_missings = TRUE,
+  verbose = getOption("dbreg.verbose", FALSE),
+  ...
+)
+}
+\arguments{
+\item{fml}{A fixest-style IV formula. Use \code{y ~ x1 + x2 | d ~ z} for models
+without absorbed fixed effects, and \code{y ~ x1 + x2 | fe1 + fe2 + fe3 | d ~ z}
+when fixed effects are present. The IV block must always be the final
+pipe-separated block.}
+
+\item{conn}{Database connection, or \code{NULL} to create an ephemeral DuckDB
+connection.}
+
+\item{table, data, path}{Mutually exclusive data source arguments with
+precedence \verb{table > data > path}.}
+
+\item{weights}{Optional character string naming an observation-weight column.}
+
+\item{vcov}{Character string or one-sided clustering formula. Character
+options are \code{"iid"} and \code{"hc1"}.}
+
+\item{strategy}{Character string selecting the IV backend. Supported values
+are \code{"auto"} (default), \code{"moments"}, \code{"demean"} (alias
+\code{"within"}), \code{"compress"}, and \code{"mundlak"}. The latter currently
+errors because it is not yet implemented for IV.}
+
+\item{cluster}{Optional clustering variable, supplied as a one-sided formula
+or character string.}
+
+\item{sql_only}{Logical indicating whether to return only the underlying SQL
+query.}
+
+\item{data_only}{Logical indicating whether to return only the aggregated
+sufficient statistics.}
+
+\item{drop_missings}{Logical indicating whether incomplete cases should be
+dropped.}
+
+\item{verbose}{Logical indicating whether progress messages are printed.}
+
+\item{...}{Additional unused arguments. Passing legacy \code{endog} or
+\code{instruments} arguments now errors.}
+}
+\value{
+A list of class \code{"dbivreg"} and \code{"dbreg"}.
+}
+\description{
+Estimates a linear IV model via exact sufficient statistics on a database
+backend. The current implementation supports three exact strategies:
+\itemize{
+\item \code{strategy = "moments"} for models without absorbed fixed effects.
+\item \code{strategy = "compress"} for repeated no-FE design rows.
+\item \code{strategy = "demean"} for one or more absorbed fixed effects.
+}
+
+Multi-way fixed effects use exact alternating projections when there are
+more than two fixed effects, or when two-way fixed effects are unbalanced or
+weights are supplied.
+}
diff --git a/man/print.dbivreg.Rd b/man/print.dbivreg.Rd
new file mode 100644
index 0000000..48fb4ce
--- /dev/null
+++ b/man/print.dbivreg.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dbivreg.R
+\name{print.dbivreg}
+\alias{print.dbivreg}
+\title{Print method for dbivreg objects}
+\usage{
+\method{print}{dbivreg}(x, ...)
+}
+\arguments{
+\item{x}{A \code{dbivreg} object.}
+
+\item{...}{Additional unused arguments.}
+}
+\description{
+Print method for \code{dbivreg} objects.
+}