microbiome · TuomasBorman · Sep 22, 2025 · Apr 20, 2025 · Apr 24, 2025 · Apr 24, 2025
diff --git a/R/miaTime.R b/R/miaTime.R
@@ -185,3 +185,46 @@ NULL
 #' @keywords
 #' data
 NULL
+
+#' @title Kumaraswamy2024
+#' @description
+#' The Kumaraswamy2024 includes microbiota and metabolite profiling data 
+#' from 78 Indian individuals (40 males, 38 females).
+#' 
+#' The Indian subjects were grouped into four diet groups (~20 subjects per group), 
+#' and fecal samples were collected across three seasonal time points.
+#'
+#' The microbiota profiling was performed using HITChip microarray analysis 
+#' (in duplicate), qPCR (in triplicate with eight-point standard curves), and 
+#' LC-HRMS and HPLC metabolite profiling with internal standards.
+#'
+#' Column metadata includes diet group assignment, sampling season, sex, BMI, 
+#' age, and questionnaire-based lifestyle metadata.
+#'
+#' Quality control metrics include Pearson correlation (>0.98) for HITChip, 
+#' qPCR assay efficiency (>0.99), and technical replicates for HPLC and qPCR.
+#'
+#' Data sources:
+#' - Microbiota HITChip microarray data
+#' - qPCR absolute abundance data
+#' - Chemical profiling data (HPLC, LC-HRMS)
+#' - Sample metadata (diet, lifestyle)
+#'
+#' Processed and raw data are available via:
+#' - Zenodo (DOI: https://doi.org/10.5281/zenodo.14424024)
+#' - NCBI-SRA (fermented foods 16S rRNA sequencing, accession: PRJNA1191989)
+#'
+#' @name Kumaraswamy2024
+#' @docType data
+#' @author Jeyaram, K., Lahti, L., Tims, S. et al
+#' @return Loads the data set in R.
+#' @references
+#' Jeyaram, K., Lahti, L., Tims, S. et al. Fermented foods affect the seasonal 
+#' stability of gut bacteria in an Indian rural population.
+#' Nat Commun 16, 771 \url{https://doi.org/10.1038/s41467-025-56014-6}
+#' @usage data(Kumaraswamy2024)
+#' @format The data set in
+#' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}
+#' format.
+#' @keywords data
+NULL
diff --git a/data/Kumaraswamy2024.rda b/data/Kumaraswamy2024.rda
diff --git a/inst/scripts/Kumaraswamy2024.R b/inst/scripts/Kumaraswamy2024.R
@@ -0,0 +1,107 @@
+# Load required libraries
+library(TreeSummarizedExperiment)
+library(Cairo)
+library(dplyr)
+library(readxl)
+
+# Useful function
+read_data <- function (f) {
+
+    x <- read_excel(f)
+    rownams <- unname(unlist(x[,1]))
+    x <- x[, -1]
+    x <- as.matrix(x)
+    rownames(x) <- rownams
+    x
+
+}
+# Get data from : https://zenodo.org/records/14424024
+# Abundance profiles
+gen <- read_data("../data/Genus_hitchip.xlsx")
+phy <- read_data("../data/Phylum_hitchip.xlsx")
+oli <- read_data("../data/Oligo_hitchip.xlsx")
+
+# Metadata
+md <- read_data("../data/modified_file.xlsx")
+rownames(md) <- unname(md[, "sample"])
+md <- as.data.frame(md)
+md[14:61] <- lapply(md[14:61], as.logical)
+# Group-A: never consumed Hawaijar and Dahi (n=20, control)
+# Group-B: consume Hawaijar and Dahi (n=21)
+# Group-C: consume Hawaijar, not Dahi (n=23)
+# Group-D: consume Dahi, not Hawaijar (n=14)
+md[, "timepoint"] <- as.numeric(unlist(md[, "timepoint"]))
+md[, "season"] <- factor(unlist(md[, "season"]), 
+                         evels=c("summer", "autumn", "winter"))
+factors <- c("age", "sex", "bmi", "clan", "nature_of_birth", 
+             "marital_status", "residence", "subject", "group")
+for(f in factors) {
+  md[, f] <- factor(unlist(md[, f]), levels=sort(unique(md[, f])))
+}
+
+# Create tse data object
+tse <-TreeSummarizedExperiment(
+    assays=SimpleList(signal=gen), colData=DataFrame(md))
+# Add altExps
+altExp(tse, "phylum")  <- TreeSummarizedExperiment(
+    assays=SimpleList(signal=phy))
+
+altExp(tse, "oligo")   <- TreeSummarizedExperiment(
+    assays=SimpleList(signal=oli))
+# There is one NA, replace it with min value
+assay(altExp(tse, "oligo"), "signal")[is.na(assay(altExp(tse, "oligo"), "signal"))] <- min(assay(altExp(tse, "oligo"), "signal"), na.rm=TRUE)
+
+# -------------------------------------------
+
+# Total load in LOG10_16S _RNA_gene copies_per_g
+# tabs 6 and 8 have different sample names
+tabs <- list()
+for (i in 1:11) {
+  tabs[[i]] <- read_excel("../data/AbsoluteloadTaxaspecificqPCRdata.xlsx", sheet = i) 
+}
+tabs <- tabs[-c(6,8)]
+d <- Reduce(function(dtf1,dtf2) dplyr::full_join(dtf1,dtf2,by="sample"), tabs)
+d <- data.frame(d)
+rownams <- unname(unlist(d[, "sample"]))
+d <- d[, -1]
+d[d %in% c("missing data", "NA")] <- NA
+d <- apply(d, 2, as.numeric)
+rownames(d) <- rownams
+altExp(tse, "total_loads")  <- TreeSummarizedExperiment(assays=SimpleList(signal=t(d)))
+
+# 'Fecal metabolite profile_LC-HRMS Data.xlsx'
+x <- read_excel("../data/Fecal\ metabolite\ profile_LC-HRMS Data.xlsx", sheet = 1) 
+colnams <- as.character(x[3,])
+x <- x[-c(1,2,3),]
+colnames(x) <- colnams
+xr <- x[, 1:5]
+rownames(xr) <- paste0("feature_", 1:nrow(xr))
+xd <- apply(as.matrix(x[, 6:ncol(x)]), 2, as.numeric)
+M <- matrix(NA, nrow=nrow(xd), ncol=ncol(tse))
+colnames(M) <- colnames(tse)
+# Match samples
+M[, colnames(xd)] <- xd
+rownames(M) <- rownames(xr)
+altExp(tse, "metabolites")  <- TreeSummarizedExperiment(
+    assays=SimpleList(signal=M), rowData=xr)
+
+# 'SCFA data-HPLC.xlsx'
+x <- read_excel("../data/SCFA\ data-HPLC.xlsx") 
+colnams <- unname(unlist(x[1,]))
+x <- x[-1, ]
+colnames(x) <- colnams
+rownams <- x$sample
+x <- x[,-1]
+x <- as.matrix(x)
+x <- apply(x,2,as.numeric)
+scfa <- t(x)
+colnames(scfa) <- rownams
+M <- matrix(NA, nrow=nrow(scfa), ncol=ncol(tse))
+colnames(M) <- colnames(tse)
+M[, colnames(scfa)] <- scfa
+rownames(M) <- colnams[-1]
+altExp(tse, "scfa")  <- TreeSummarizedExperiment(assays=SimpleList(signal=M))
+
+# -----------------------------------------------------------------------------
+
+save(tse, file="Kumaraswamy2024.rda")
diff --git a/man/Kumaraswamy2024.Rd b/man/Kumaraswamy2024.Rd