From 8178d5163138697868074f0c7183b43f0a248e90 Mon Sep 17 00:00:00 2001 From: AnneliektH Date: Wed, 18 Sep 2024 12:43:21 -0700 Subject: [PATCH 1/2] enable sample sorting by category --- src/sourmash_plugin_betterplot.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/sourmash_plugin_betterplot.py b/src/sourmash_plugin_betterplot.py index 83750e8..dd9cf08 100644 --- a/src/sourmash_plugin_betterplot.py +++ b/src/sourmash_plugin_betterplot.py @@ -855,6 +855,10 @@ def __init__(self, subparser): "--no-y-labels", action="store_true", help="disable Y axis labels" ) + subparser.add_argument( + "--sort-by-category", action="store_true", + help="Sort rows by category" + ) def main(self, args): super().main(args) @@ -864,14 +868,30 @@ def main(self, args): # pick out all the distinct queries/matches. notify(f"loaded {len(rows)} rows from '{args.manysearch_csv}'") - query_d = manysearch_rows_to_index(rows, column_name='query_name') - against_d = manysearch_rows_to_index(rows, column_name='match_name') + # optional sorting samples by category + if args.sort_by_category: + query_d = manysearch_rows_to_index(rows, column_name='query_name') + # make df for color input + df_col = pd.read_csv(args.row_categories_csv) + # make a df from query d + df_query = pd.DataFrame(list(query_d.items()), columns=['label', 'order']) + # merge, sort on category and reorder + df_col = df_col.merge(df_query, on='label') + df_col = df_col.sort_values(by=['category']) + df_col['order'] = range(len(df_col)) + # put reordered back into a dict + query_d = df_col.set_index('label')['order'].to_dict() + else: + query_d = manysearch_rows_to_index(rows, column_name='query_name') + + against_d = manysearch_rows_to_index(rows, column_name='match_name') notify(f"loaded {len(query_d)} x {len(against_d)} total elements") query_d_items = list(sorted(query_d.items(), key=lambda x: x[1])) against_d_items = list(sorted(against_d.items(), key=lambda x: x[1])) + mat = numpy.zeros((len(query_d), len(against_d))) colname = args.use_column @@ -910,6 +930,7 @@ def main(self, args): if args.boolean: # turn off colorbar if boolean. kw_args['cbar_pos'] = None + yticklabels=sample_d_to_idents(query_d_items) xticklabels=sample_d_to_idents(against_d_items) if args.no_labels: @@ -928,6 +949,7 @@ def main(self, args): vmin=args.vmin, vmax=args.vmax, col_colors=col_colors, + row_cluster=False, row_colors=row_colors, xticklabels=xticklabels, yticklabels=yticklabels, From 9850c1afa11e1896b3fdff16fd5fa762c0e56822 Mon Sep 17 00:00:00 2001 From: AnneliektH Date: Wed, 18 Sep 2024 13:13:31 -0700 Subject: [PATCH 2/2] enable sample sorting by category --- src/sourmash_plugin_betterplot.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/sourmash_plugin_betterplot.py b/src/sourmash_plugin_betterplot.py index dd9cf08..1e9c4df 100644 --- a/src/sourmash_plugin_betterplot.py +++ b/src/sourmash_plugin_betterplot.py @@ -857,7 +857,7 @@ def __init__(self, subparser): ) subparser.add_argument( "--sort-by-category", action="store_true", - help="Sort rows by category" + help="Sort rows by category, instead of clustering them" ) def main(self, args): @@ -941,6 +941,11 @@ def main(self, args): elif args.no_y_labels: yticklabels = [] + if args.sort_by_category: + row_cluster=False + else: + row_cluster=True + # turn into dissimilarity matrix # plot! fig = sns.clustermap( @@ -949,7 +954,7 @@ def main(self, args): vmin=args.vmin, vmax=args.vmax, col_colors=col_colors, - row_cluster=False, + row_cluster=row_cluster, row_colors=row_colors, xticklabels=xticklabels, yticklabels=yticklabels,