diff --git a/DESCRIPTION b/DESCRIPTION index 4c98cb4..b82219a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,7 +20,6 @@ Imports: DelayedArray, DelayedMatrixStats, BiocParallel, - BiocNeighbors, stats, utils, Rcpp, @@ -28,8 +27,7 @@ Imports: LinkingTo: Rcpp, beachmat, - assorthead (>= 1.3.5), - BiocNeighbors + assorthead (>= 1.3.5) Suggests: testthat, knitr, diff --git a/NAMESPACE b/NAMESPACE index 106ad47..107fdec 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -30,9 +30,6 @@ importClassesFrom(S4Vectors,DataFrame) importClassesFrom(S4Vectors,List) importClassesFrom(SummarizedExperiment,SummarizedExperiment) importFrom(BiocGenerics,cbind) -importFrom(BiocNeighbors,AnnoyParam) -importFrom(BiocNeighbors,KmknnParam) -importFrom(BiocNeighbors,defineBuilder) importFrom(BiocParallel,SerialParam) importFrom(BiocParallel,bpnworkers) importFrom(DelayedArray,DelayedArray) diff --git a/R/RcppExports.R b/R/RcppExports.R index 56bd22a..dc6ac18 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -19,18 +19,14 @@ set_executor <- function(ptr) { .Call('_SingleR_set_executor', PACKAGE = 'SingleR', ptr) } -train_integrated <- function(test_features, references, ref_features, labels, prebuilt, nthreads) { - .Call('_SingleR_train_integrated', PACKAGE = 'SingleR', test_features, references, ref_features, labels, prebuilt, nthreads) +train_integrated <- function(test_nrow, test_features, references, ref_features, labels, prebuilt, nthreads) { + .Call('_SingleR_train_integrated', PACKAGE = 'SingleR', test_nrow, test_features, references, ref_features, labels, prebuilt, nthreads) } #' @importFrom Rcpp sourceCpp #' @useDynLib SingleR -train_single <- function(test_features, ref, ref_features, labels, markers, builder, nthreads) { - .Call('_SingleR_train_single', PACKAGE = 'SingleR', test_features, ref, ref_features, labels, markers, builder, nthreads) -} - -get_ref_subset <- function(built) { - .Call('_SingleR_get_ref_subset', PACKAGE = 'SingleR', built) +train_single <- function(test_nrow, test_features, ref, ref_features, labels, markers, nthreads) { + .Call('_SingleR_train_single', PACKAGE = 'SingleR', test_nrow, test_features, ref, ref_features, labels, markers, nthreads) } is_valid_built <- function(built) { diff --git a/R/combineRecomputedResults.R b/R/combineRecomputedResults.R index adf3d93..3ddb026 100644 --- a/R/combineRecomputedResults.R +++ b/R/combineRecomputedResults.R @@ -163,6 +163,7 @@ combineRecomputedResults <- function( # Applying the integration. ibuilt <- train_integrated( + test_nrow=length(test.genes), test_features=all.inter.test, references=lapply(trained, function(x) initializeCpp(x$ref, .check.na=FALSE)), ref_features=all.inter.ref, diff --git a/R/getClassicMarkers.R b/R/getClassicMarkers.R index 4cf9b86..c00f1f4 100644 --- a/R/getClassicMarkers.R +++ b/R/getClassicMarkers.R @@ -71,7 +71,10 @@ getClassicMarkers <- function(ref, labels, assay.type="logcounts", check.missing } common <- as.character(common) # avoid problems with NULL rownames for zero-row inputs. for (i in seq_along(ref)) { - ref[[i]] <- DelayedArray(ref[[i]])[common,,drop=FALSE] + # Use match() as this works with zero-row matrices that aren't allowed to have rownames, + # see discussion at https://mailman.stat.ethz.ch/pipermail/r-devel/2006-August/038893.html. + curmat <- ref[[i]] + ref[[i]] <- DelayedArray(curmat)[match(common, rownames(curmat)),,drop=FALSE] } blocks <- NULL diff --git a/R/rebuildIndices.R b/R/rebuildIndices.R index 99185ee..cc62b77 100644 --- a/R/rebuildIndices.R +++ b/R/rebuildIndices.R @@ -46,9 +46,9 @@ rebuildIndex <- function(trained, num.threads=1) { markers=trained$markers$full, labels=trained$labels$full, ulabels=trained$labels$unique, - BNPARAM=trained$options$BNPARAM, test.genes=trained$options$test.genes, - num.threads=num.threads) + num.threads=num.threads + )$index } trained } diff --git a/R/trainSingleR.R b/R/trainSingleR.R index 345ce87..ad3f789 100644 --- a/R/trainSingleR.R +++ b/R/trainSingleR.R @@ -43,7 +43,7 @@ #' if \code{ref} is a \link[SummarizedExperiment]{SummarizedExperiment} object (or is a list that contains one or more such objects). #' @param check.missing Logical scalar indicating whether rows should be checked for missing values. #' If true and any missing values are found, the rows containing these values are silently removed. -#' @param BNPARAM A \link[BiocNeighbors]{BiocNeighborParam} object specifying how the neighbor search index should be constructed. +#' @param BNPARAM Deprecated and ignored. #' @param approximate Deprecated, use \code{BNPARAM} instead. #' @param num.threads Integer scalar specifying the number of threads to use for index building. #' @param hint.sce Boolean indicating whether to print a hint to change \code{de.method=} when any entry of \code{ref} is a \link[SingleCellExperiment]{SingleCellExperiment}. @@ -196,7 +196,6 @@ #' #' @export #' @importFrom S4Vectors List isSingleString metadata metadata<- -#' @importFrom BiocNeighbors defineBuilder AnnoyParam KmknnParam #' @importFrom BiocParallel SerialParam #' @importFrom S4Vectors List #' @importFrom SummarizedExperiment assay @@ -238,14 +237,6 @@ trainSingleR <- function( stop("list-like 'genes' should be the same length as 'ref'") } - if (is.null(BNPARAM)) { - if (approximate) { - BNPARAM <- AnnoyParam() - } else { - BNPARAM <- KmknnParam() - } - } - output <- vector("list", length(ref)) names(output) <- names(ref) for (l in seq_along(ref)) { @@ -309,16 +300,15 @@ trainSingleR <- function( ulabels=ulabels, test.genes=test.genes, markers=markers, - BNPARAM=BNPARAM, num.threads=num.threads ) output[[l]] <- List( - built = built, + built = built$index, ref = curref, labels = list(full = curlabels, unique = ulabels), - markers = list(full = markers, unique = rownames(curref)[get_ref_subset(built) + 1]), - options = list(BNPARAM = BNPARAM, test.genes = test.genes) + markers = list(full = markers, unique = rownames(curref)[built$ref_subset + 1]), + options = list(test.genes = test.genes) ) } @@ -370,7 +360,7 @@ trainSingleR <- function( } #' @importFrom beachmat initializeCpp -.build_index <- function(ref, labels, ulabels, markers, test.genes, BNPARAM, num.threads) { +.build_index <- function(ref, labels, ulabels, markers, test.genes, num.threads) { for (m in seq_along(markers)) { current <- markers[[m]] for (n in seq_along(current)) { @@ -384,22 +374,23 @@ trainSingleR <- function( } if (is.null(test.genes)) { + test.nrow <- nrow(ref) test.genes <- ref.genes <- seq_len(nrow(ref)) } else { + test.nrow <- length(test.genes) intersection <- .create_intersection(test.genes, rownames(ref)) test.genes <- intersection$test ref.genes <- intersection$reference } - builder <- defineBuilder(BNPARAM) parsed <- initializeCpp(ref, .check.na=FALSE) train_single( + test_nrow=test.nrow, test_features=test.genes - 1L, ref=parsed, ref_features=ref.genes - 1L, labels=match(labels, ulabels) - 1L, markers=markers, - builder=builder$builder, nthreads=num.threads ) } diff --git a/man/trainSingleR.Rd b/man/trainSingleR.Rd index f9f7258..de59c1c 100644 --- a/man/trainSingleR.Rd +++ b/man/trainSingleR.Rd @@ -93,7 +93,7 @@ It will also suggest setting \code{aggr.ref=TRUE} for greater efficiency when \c \item{num.threads}{Integer scalar specifying the number of threads to use for index building.} -\item{BNPARAM}{A \link[BiocNeighbors]{BiocNeighborParam} object specifying how the neighbor search index should be constructed.} +\item{BNPARAM}{Deprecated and ignored.} \item{BPPARAM}{A \link[BiocParallel]{BiocParallelParam} object specifying how parallelization should be performed when \code{check.missing = TRUE}.} } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 932794c..37b20be 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -67,43 +67,34 @@ BEGIN_RCPP END_RCPP } // train_integrated -SEXP train_integrated(Rcpp::List test_features, Rcpp::List references, Rcpp::List ref_features, Rcpp::List labels, Rcpp::List prebuilt, int nthreads); -RcppExport SEXP _SingleR_train_integrated(SEXP test_featuresSEXP, SEXP referencesSEXP, SEXP ref_featuresSEXP, SEXP labelsSEXP, SEXP prebuiltSEXP, SEXP nthreadsSEXP) { +SEXP train_integrated(int test_nrow, Rcpp::List test_features, Rcpp::List references, Rcpp::List ref_features, Rcpp::List labels, Rcpp::List prebuilt, int nthreads); +RcppExport SEXP _SingleR_train_integrated(SEXP test_nrowSEXP, SEXP test_featuresSEXP, SEXP referencesSEXP, SEXP ref_featuresSEXP, SEXP labelsSEXP, SEXP prebuiltSEXP, SEXP nthreadsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; + Rcpp::traits::input_parameter< int >::type test_nrow(test_nrowSEXP); Rcpp::traits::input_parameter< Rcpp::List >::type test_features(test_featuresSEXP); Rcpp::traits::input_parameter< Rcpp::List >::type references(referencesSEXP); Rcpp::traits::input_parameter< Rcpp::List >::type ref_features(ref_featuresSEXP); Rcpp::traits::input_parameter< Rcpp::List >::type labels(labelsSEXP); Rcpp::traits::input_parameter< Rcpp::List >::type prebuilt(prebuiltSEXP); Rcpp::traits::input_parameter< int >::type nthreads(nthreadsSEXP); - rcpp_result_gen = Rcpp::wrap(train_integrated(test_features, references, ref_features, labels, prebuilt, nthreads)); + rcpp_result_gen = Rcpp::wrap(train_integrated(test_nrow, test_features, references, ref_features, labels, prebuilt, nthreads)); return rcpp_result_gen; END_RCPP } // train_single -SEXP train_single(Rcpp::IntegerVector test_features, Rcpp::RObject ref, Rcpp::IntegerVector ref_features, Rcpp::IntegerVector labels, Rcpp::List markers, Rcpp::RObject builder, int nthreads); -RcppExport SEXP _SingleR_train_single(SEXP test_featuresSEXP, SEXP refSEXP, SEXP ref_featuresSEXP, SEXP labelsSEXP, SEXP markersSEXP, SEXP builderSEXP, SEXP nthreadsSEXP) { +SEXP train_single(int test_nrow, Rcpp::IntegerVector test_features, Rcpp::RObject ref, Rcpp::IntegerVector ref_features, Rcpp::IntegerVector labels, Rcpp::List markers, int nthreads); +RcppExport SEXP _SingleR_train_single(SEXP test_nrowSEXP, SEXP test_featuresSEXP, SEXP refSEXP, SEXP ref_featuresSEXP, SEXP labelsSEXP, SEXP markersSEXP, SEXP nthreadsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; + Rcpp::traits::input_parameter< int >::type test_nrow(test_nrowSEXP); Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type test_features(test_featuresSEXP); Rcpp::traits::input_parameter< Rcpp::RObject >::type ref(refSEXP); Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type ref_features(ref_featuresSEXP); Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type labels(labelsSEXP); Rcpp::traits::input_parameter< Rcpp::List >::type markers(markersSEXP); - Rcpp::traits::input_parameter< Rcpp::RObject >::type builder(builderSEXP); Rcpp::traits::input_parameter< int >::type nthreads(nthreadsSEXP); - rcpp_result_gen = Rcpp::wrap(train_single(test_features, ref, ref_features, labels, markers, builder, nthreads)); - return rcpp_result_gen; -END_RCPP -} -// get_ref_subset -Rcpp::IntegerVector get_ref_subset(SEXP built); -RcppExport SEXP _SingleR_get_ref_subset(SEXP builtSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::traits::input_parameter< SEXP >::type built(builtSEXP); - rcpp_result_gen = Rcpp::wrap(get_ref_subset(built)); + rcpp_result_gen = Rcpp::wrap(train_single(test_nrow, test_features, ref, ref_features, labels, markers, nthreads)); return rcpp_result_gen; END_RCPP } @@ -123,9 +114,8 @@ static const R_CallMethodDef CallEntries[] = { {"_SingleR_classify_single", (DL_FUNC) &_SingleR_classify_single, 6}, {"_SingleR_find_classic_markers", (DL_FUNC) &_SingleR_find_classic_markers, 6}, {"_SingleR_set_executor", (DL_FUNC) &_SingleR_set_executor, 1}, - {"_SingleR_train_integrated", (DL_FUNC) &_SingleR_train_integrated, 6}, + {"_SingleR_train_integrated", (DL_FUNC) &_SingleR_train_integrated, 7}, {"_SingleR_train_single", (DL_FUNC) &_SingleR_train_single, 7}, - {"_SingleR_get_ref_subset", (DL_FUNC) &_SingleR_get_ref_subset, 1}, {"_SingleR_is_valid_built", (DL_FUNC) &_SingleR_is_valid_built, 1}, {NULL, NULL, 0} }; diff --git a/src/classify_single.cpp b/src/classify_single.cpp index 56275c1..26d820e 100644 --- a/src/classify_single.cpp +++ b/src/classify_single.cpp @@ -7,7 +7,7 @@ //[[Rcpp::export(rng=false)]] SEXP classify_single(Rcpp::RObject test, SEXP prebuilt, double quantile, bool use_fine_tune, double fine_tune_threshold, int nthreads) { Rtatami::BoundNumericPointer parsed(test); - TrainedSingleIntersectPointer built(prebuilt); + TrainedSinglePointer built(prebuilt); // Setting up outputs. size_t ncells = parsed->ptr->ncol(); @@ -34,7 +34,7 @@ SEXP classify_single(Rcpp::RObject test, SEXP prebuilt, double quantile, bool us opts.quantile = quantile; opts.fine_tune = use_fine_tune; opts.fine_tune_threshold = fine_tune_threshold; - singlepp::classify_single_intersect(*(parsed->ptr), *built, buffers, opts); + singlepp::classify_single(*(parsed->ptr), *built, buffers, opts); return Rcpp::List::create( Rcpp::Named("best") = best, diff --git a/src/train_integrated.cpp b/src/train_integrated.cpp index 259d9c5..8c5d949 100644 --- a/src/train_integrated.cpp +++ b/src/train_integrated.cpp @@ -4,7 +4,15 @@ #include //[[Rcpp::export(rng=false)]] -SEXP train_integrated(Rcpp::List test_features, Rcpp::List references, Rcpp::List ref_features, Rcpp::List labels, Rcpp::List prebuilt, int nthreads) { +SEXP train_integrated( + int test_nrow, + Rcpp::List test_features, + Rcpp::List references, + Rcpp::List ref_features, + Rcpp::List labels, + Rcpp::List prebuilt, + int nthreads +) { size_t nrefs = references.size(); std::vector > inputs; @@ -29,9 +37,10 @@ SEXP train_integrated(Rcpp::List test_features, Rcpp::List references, Rcpp::Lis holding_labs[r] = labels[r]; Rcpp::RObject built = prebuilt[r]; - TrainedSingleIntersectPointer curbuilt(built); + TrainedSinglePointer curbuilt(built); - inputs.push_back(singlepp::prepare_integrated_input_intersect( + inputs.push_back(singlepp::prepare_integrated_input( + test_nrow, curinter, *(parsed->ptr), static_cast(holding_labs[r].begin()), diff --git a/src/train_single.cpp b/src/train_single.cpp index 4fba77f..f4441e9 100644 --- a/src/train_single.cpp +++ b/src/train_single.cpp @@ -1,5 +1,4 @@ #include "utils.h" -#include "BiocNeighbors.h" #include #include @@ -7,13 +6,18 @@ //' @importFrom Rcpp sourceCpp //' @useDynLib SingleR //[[Rcpp::export(rng=false)]] -SEXP train_single(Rcpp::IntegerVector test_features, Rcpp::RObject ref, Rcpp::IntegerVector ref_features, Rcpp::IntegerVector labels, Rcpp::List markers, Rcpp::RObject builder, int nthreads) { +SEXP train_single( + int test_nrow, + Rcpp::IntegerVector test_features, + Rcpp::RObject ref, + Rcpp::IntegerVector ref_features, + Rcpp::IntegerVector labels, + Rcpp::List markers, + int nthreads +) { + // We use all available markers; assume subsetting was applied on the R side. singlepp::TrainSingleOptions opts; opts.num_threads = nthreads; - opts.top = -1; // Use all available markers; assume subsetting was applied on the R side. - - BiocNeighbors::BuilderPointer bptr(builder); - opts.trainer = std::shared_ptr(std::shared_ptr{}, bptr.get()); // make a no-op shared pointer. Rtatami::BoundNumericPointer parsed(ref); int NR = parsed->ptr->nrow(); @@ -50,22 +54,21 @@ SEXP train_single(Rcpp::IntegerVector test_features, Rcpp::RObject ref, Rcpp::In } // Building the indices. - auto built = singlepp::train_single_intersect( + std::vector ref_subset; + auto built = singlepp::train_single( + test_nrow, inter, *(parsed->ptr), static_cast(labels.begin()), std::move(markers2), + &ref_subset, opts ); - return TrainedSingleIntersectPointer(new TrainedSingleIntersect(std::move(built)), true); -} - -//[[Rcpp::export(rng=false)]] -Rcpp::IntegerVector get_ref_subset(SEXP built) { - TrainedSingleIntersectPointer ptr(built); - const auto& rsub = ptr->get_ref_subset(); - return Rcpp::IntegerVector(rsub.begin(), rsub.end()); + return Rcpp::List::create( + Rcpp::Named("index") = TrainedSinglePointer(new TrainedSingle(std::move(built)), true), + Rcpp::Named("ref_subset") = Rcpp::IntegerVector(ref_subset.begin(), ref_subset.end()) + ); } //[[Rcpp::export(rng=false)]] diff --git a/src/utils.h b/src/utils.h index 3e9f97e..ec40e2a 100644 --- a/src/utils.h +++ b/src/utils.h @@ -5,9 +5,9 @@ #include "Rtatami.h" // before singlepp includes to ensure the tatami_r::parallelize() override is set. #include "singlepp/singlepp.hpp" -typedef singlepp::TrainedSingleIntersect TrainedSingleIntersect; +typedef singlepp::TrainedSingle TrainedSingle; -typedef Rcpp::XPtr TrainedSingleIntersectPointer; +typedef Rcpp::XPtr TrainedSinglePointer; typedef singlepp::TrainedIntegrated TrainedIntegrated;