Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions R/miaTime.R
Original file line number Diff line number Diff line change
Expand Up @@ -185,3 +185,46 @@ NULL
#' @keywords
#' data
NULL

#' @title Kumaraswamy2024
#' @description
#' The Kumaraswamy2024 includes microbiota and metabolite profiling data
#' from 78 Indian individuals (40 males, 38 females).
#'
#' The Indian subjects were grouped into four diet groups (~20 subjects per group),
#' and fecal samples were collected across three seasonal time points.
#'
#' The microbiota profiling was performed using HITChip microarray analysis
#' (in duplicate), qPCR (in triplicate with eight-point standard curves), and
#' LC-HRMS and HPLC metabolite profiling with internal standards.
#'
#' Column metadata includes diet group assignment, sampling season, sex, BMI,
#' age, and questionnaire-based lifestyle metadata.
Comment thread
antagomir marked this conversation as resolved.
#'
#' Quality control metrics include Pearson correlation (>0.98) for HITChip,
#' qPCR assay efficiency (>0.99), and technical replicates for HPLC and qPCR.
#'
#' Data sources:
#' - Microbiota HITChip microarray data
#' - qPCR absolute abundance data
#' - Chemical profiling data (HPLC, LC-HRMS)
Comment thread
0xMuluh marked this conversation as resolved.
#' - Sample metadata (diet, lifestyle)
#'
#' Processed and raw data are available via:
#' - Zenodo (DOI: https://doi.org/10.5281/zenodo.14424024)
#' - NCBI-SRA (fermented foods 16S rRNA sequencing, accession: PRJNA1191989)
#'
#' @name Kumaraswamy2024
#' @docType data
#' @author Jeyaram, K., Lahti, L., Tims, S. et al
#' @return Loads the data set in R.
#' @references
#' Jeyaram, K., Lahti, L., Tims, S. et al. Fermented foods affect the seasonal
#' stability of gut bacteria in an Indian rural population.
#' Nat Commun 16, 771 \url{https://doi.org/10.1038/s41467-025-56014-6}
#' @usage data(Kumaraswamy2024)
#' @format The data set in
#' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}
#' format.
#' @keywords data
NULL
Binary file added data/Kumaraswamy2024.rda
Binary file not shown.
107 changes: 107 additions & 0 deletions inst/scripts/Kumaraswamy2024.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Load required libraries
library(TreeSummarizedExperiment)
library(Cairo)
library(dplyr)
library(readxl)

# Useful function
read_data <- function (f) {

x <- read_excel(f)
rownams <- unname(unlist(x[,1]))
x <- x[, -1]
x <- as.matrix(x)
rownames(x) <- rownams
x

}
# Get data from : https://zenodo.org/records/14424024
# Abundance profiles
gen <- read_data("../data/Genus_hitchip.xlsx")
phy <- read_data("../data/Phylum_hitchip.xlsx")
oli <- read_data("../data/Oligo_hitchip.xlsx")

# Metadata
md <- read_data("../data/modified_file.xlsx")
rownames(md) <- unname(md[, "sample"])
md <- as.data.frame(md)
md[14:61] <- lapply(md[14:61], as.logical)
# Group-A: never consumed Hawaijar and Dahi (n=20, control)
# Group-B: consume Hawaijar and Dahi (n=21)
# Group-C: consume Hawaijar, not Dahi (n=23)
# Group-D: consume Dahi, not Hawaijar (n=14)
md[, "timepoint"] <- as.numeric(unlist(md[, "timepoint"]))
md[, "season"] <- factor(unlist(md[, "season"]),
evels=c("summer", "autumn", "winter"))
factors <- c("age", "sex", "bmi", "clan", "nature_of_birth",
"marital_status", "residence", "subject", "group")
for(f in factors) {
md[, f] <- factor(unlist(md[, f]), levels=sort(unique(md[, f])))
}

# Create tse data object
tse <-TreeSummarizedExperiment(
assays=SimpleList(signal=gen), colData=DataFrame(md))
# Add altExps
altExp(tse, "phylum") <- TreeSummarizedExperiment(
assays=SimpleList(signal=phy))

altExp(tse, "oligo") <- TreeSummarizedExperiment(
assays=SimpleList(signal=oli))
# There is one NA, replace it with min value
assay(altExp(tse, "oligo"), "signal")[is.na(assay(altExp(tse, "oligo"), "signal"))] <- min(assay(altExp(tse, "oligo"), "signal"), na.rm=TRUE)

# -------------------------------------------

# Total load in LOG10_16S _RNA_gene copies_per_g
# tabs 6 and 8 have different sample names
tabs <- list()
for (i in 1:11) {
tabs[[i]] <- read_excel("../data/AbsoluteloadTaxaspecificqPCRdata.xlsx", sheet = i)
}
tabs <- tabs[-c(6,8)]
d <- Reduce(function(dtf1,dtf2) dplyr::full_join(dtf1,dtf2,by="sample"), tabs)
d <- data.frame(d)
rownams <- unname(unlist(d[, "sample"]))
d <- d[, -1]
d[d %in% c("missing data", "NA")] <- NA
d <- apply(d, 2, as.numeric)
rownames(d) <- rownams
altExp(tse, "total_loads") <- TreeSummarizedExperiment(assays=SimpleList(signal=t(d)))

# 'Fecal metabolite profile_LC-HRMS Data.xlsx'
x <- read_excel("../data/Fecal\ metabolite\ profile_LC-HRMS Data.xlsx", sheet = 1)
colnams <- as.character(x[3,])
x <- x[-c(1,2,3),]
colnames(x) <- colnams
xr <- x[, 1:5]
rownames(xr) <- paste0("feature_", 1:nrow(xr))
xd <- apply(as.matrix(x[, 6:ncol(x)]), 2, as.numeric)
M <- matrix(NA, nrow=nrow(xd), ncol=ncol(tse))
colnames(M) <- colnames(tse)
# Match samples
M[, colnames(xd)] <- xd
rownames(M) <- rownames(xr)
altExp(tse, "metabolites") <- TreeSummarizedExperiment(
assays=SimpleList(signal=M), rowData=xr)

# 'SCFA data-HPLC.xlsx'
x <- read_excel("../data/SCFA\ data-HPLC.xlsx")
colnams <- unname(unlist(x[1,]))
x <- x[-1, ]
colnames(x) <- colnams
rownams <- x$sample
x <- x[,-1]
x <- as.matrix(x)
x <- apply(x,2,as.numeric)
scfa <- t(x)
colnames(scfa) <- rownams
M <- matrix(NA, nrow=nrow(scfa), ncol=ncol(tse))
colnames(M) <- colnames(tse)
M[, colnames(scfa)] <- scfa
rownames(M) <- colnams[-1]
altExp(tse, "scfa") <- TreeSummarizedExperiment(assays=SimpleList(signal=M))

# -----------------------------------------------------------------------------

save(tse, file="Kumaraswamy2024.rda")
57 changes: 57 additions & 0 deletions man/Kumaraswamy2024.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading