From 960c1905057e97702d6582ebff3d0743f979db58 Mon Sep 17 00:00:00 2001 From: Luca Ferragina Date: Tue, 22 Jul 2025 20:26:29 +0200 Subject: [PATCH 1/5] Introduce new DQM plotter based on mplhep - Usage detailed by running dqm-plot -h - Defaults to CMS color palette with unique markers - Saves plots in png format by default (can also add pdf output) - Vertical space is added for legend padding. Legend entries are split into columns, compressed in camelCase if necessary or placed outside the pad in extreme cases - Supports up to 10 files to compare (more at your own risk) - Can optionally add a index.php file in all output subdirectories for web viewing - Set log scale automatically for most common cases (res, pT, ...) - Plots are produced as errorbars by default (use --histograms to override) - Grid is enabled by default on all plots, can be disabled with --no-grid - For summary plots, labels are extracted from root file and used in the comparison plots (all labels from all files are included) --- DQMServices/Components/scripts/dqm-plot | 1118 +++++++++++++++++++++++ 1 file changed, 1118 insertions(+) create mode 100755 DQMServices/Components/scripts/dqm-plot diff --git a/DQMServices/Components/scripts/dqm-plot b/DQMServices/Components/scripts/dqm-plot new file mode 100755 index 0000000000000..e27afe48d76fa --- /dev/null +++ b/DQMServices/Components/scripts/dqm-plot @@ -0,0 +1,1118 @@ +#!/usr/bin/env python3 + +""" +Simple DQM comparison plotter using mplhep with CMS styling. + +Usage: dqm-plot -s "DQMData/Run 1/HLT/Run summary/Muon/*" [options] $DQM_FILES +See all available options with `dqm-plot -h`. +""" + +import ROOT +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec +import mplhep as hep +import argparse +import os +import re +import multiprocessing +import warnings +import urllib.request +import urllib.error +from pathlib import Path + +# Suppress numpy warnings about subnormal float values +warnings.filterwarnings("ignore", category=UserWarning, module="numpy") + +# Set CMS style globally +plt.style.use(hep.style.CMS) + +# Use matplotlib's mathtext +plt.rcParams["mathtext.default"] = "regular" + + +class DQMPlotter: + def __init__(self, figsize=(12, 9), ratio_height=0.3, processors=None): + """ + Initialize the plotter. + + Args: + figsize: Figure size (width, height) in inches + ratio_height: Fraction of figure height for ratio plot + processors: Number of processors to use for multiprocessing (None for auto-detect) + """ + self.figsize = figsize + self.ratio_height = ratio_height + # Color palette from https://cms-analysis.docs.cern.ch/guidelines/plotting/colors/ + self.colors = [ + "#3f8fda", + "#ffa90e", + "#bd1f01", + "#94a4a2", + "#832db6", + "#a96b59", + "#e76300", + "#b9ac70", + "#717581", + "#92dadd", + ] + self.markers = [ + "o", # circle + "s", # square + "^", # triangle up + "D", # diamond + "v", # triangle down + "p", # pentagon + "*", # star + "h", # hexagon + "<", # triangle left + ">", # triangle right + ] + self.processors = ( + processors if processors is not None else multiprocessing.cpu_count() + ) + + def root_to_numpy(self, hist): + """ + Convert ROOT histogram to numpy arrays. + + Args: + hist: ROOT histogram + + Returns: + tuple: (bin_centers, bin_contents, bin_errors, bin_edges, bin_labels, has_labels) + """ + n_bins = hist.GetNbinsX() + bin_edges = np.array([hist.GetBinLowEdge(i) for i in range(1, n_bins + 2)]) + bin_centers = np.array([hist.GetBinCenter(i) for i in range(1, n_bins + 1)]) + + bin_contents = np.array([hist.GetBinContent(i) for i in range(1, n_bins + 1)]) + bin_errors = np.array([hist.GetBinError(i) for i in range(1, n_bins + 1)]) + + bin_labels = [] + for i in range(1, n_bins + 1): + label = hist.GetXaxis().GetBinLabel(i) + bin_labels.append(self._clean_bin_label(label) if label else str(i)) + + # Only use extracted labels if meaningful + has_labels = any(label and not label.isdigit() for label in bin_labels) + + return bin_centers, bin_contents, bin_errors, bin_edges, bin_labels, has_labels + + def _clean_bin_label(self, label): + """Clean up bin label for better readability.""" + return label.replace("TrackSelectionHighPurity", "HP") + + def _apply_axis_scaling(self, ax, xlabel, ylabel, title, logy=False): + """Apply log scaling to axes based on content and user settings.""" + if logy: + y_min, y_max = ax.get_ylim() + if y_max <= 0: + output_file = getattr(self, '_current_output_path', 'unknown') + print(f"\nWarning: Cannot set log scale for plot '{title}' (output: {output_file}) - no positive y-values (y_max: {y_max})") + else: + ax.set_yscale("log") + + # Automatic log scale for specific variable types + if "p_{\mathrm{T}}" in xlabel and "pull" not in xlabel.lower(): + ax.set_xscale("log") + xlabel = r"$p_{\mathrm{T}}$ [GeV]" + + if ("p_{\mathrm{T}}" in ylabel and "pull" not in ylabel.lower()) or "Sigma" in title: + y_min, y_max = ax.get_ylim() + if y_max <= 0: + output_file = getattr(self, '_current_output_path', 'unknown') + print(f"\nWarning: Cannot set log scale for plot '{title}' (output: {output_file}) - no positive y-values (y_max: {y_max})") + else: + ax.set_yscale("log") + + if "vertpos" in xlabel: + ax.set_xscale("log") + xlabel = "vert r [cm]" + + return xlabel, ylabel + + def _plot_histogram_data(self, ax, bin_centers, bin_contents, bin_errors, bin_edges, + label, color_idx, plot_histograms): + """Plot histogram data either as histogram or error bars.""" + if plot_histograms: + hep.histplot( + bin_contents, + bins=bin_edges, + yerr=bin_errors, + label=label, + color=self.colors[color_idx % len(self.colors)], + histtype="step", + linewidth=2, + ax=ax, + ) + else: + ax.errorbar( + bin_centers, + bin_contents, + yerr=bin_errors, + label=label, + color=self.colors[color_idx % len(self.colors)], + fmt=self.markers[color_idx % len(self.markers)], + markersize=5, + capsize=2, + linewidth=1.5, + ) + + def _calculate_and_plot_ratio(self, ax_ratio, bin_centers, bin_contents, bin_errors, + ref_centers, ref_contents, ref_errors, color_idx, plot_histograms): + """Calculate and plot ratio between current and reference histogram.""" + if ax_ratio is None: + return [] + + tolerance = 1e-6 + matching_indices = [] + + for idx, center in enumerate(bin_centers): + ref_idx = np.argmin(np.abs(ref_centers - center)) + if np.abs(ref_centers[ref_idx] - center) < tolerance: + matching_indices.append((idx, ref_idx)) + + if not matching_indices: + return [] + + curr_idxs, ref_idxs = zip(*matching_indices) + matching_centers = bin_centers[list(curr_idxs)] + + matching_ref_contents = ref_contents[list(ref_idxs)] + matching_contents = bin_contents[list(curr_idxs)] + matching_ref_errors = ref_errors[list(ref_idxs)] + matching_errors = bin_errors[list(curr_idxs)] + + ratio = np.divide( + matching_contents, + matching_ref_contents, + out=np.zeros_like(matching_contents), + where=matching_ref_contents != 0, + ) + + ratio_errors = np.zeros_like(ratio) + mask = (matching_ref_contents != 0) & (matching_contents != 0) + ratio_errors[mask] = np.abs(ratio[mask]) * np.sqrt( + np.power(matching_errors[mask] / np.maximum(matching_contents[mask], 1e-10), 2) + + np.power(matching_ref_errors[mask] / np.maximum(matching_ref_contents[mask], 1e-10), 2) + ) + ratio_errors = np.nan_to_num(ratio_errors, nan=0.0, posinf=0.0, neginf=0.0) + + if plot_histograms and len(matching_centers) > 1: + bin_widths = np.diff(np.sort(matching_centers)) + avg_width = np.mean(bin_widths) + matching_edges = np.concatenate([ + [matching_centers[0] - avg_width / 2], + matching_centers + avg_width / 2, + ]) + hep.histplot( + ratio, bins=matching_edges, yerr=ratio_errors, + color=self.colors[color_idx % len(self.colors)], + histtype="step", linewidth=2, ax=ax_ratio, + ) + else: + ax_ratio.errorbar( + matching_centers, ratio, yerr=ratio_errors, + color=self.colors[color_idx % len(self.colors)], + fmt=self.markers[color_idx % len(self.markers)], + markersize=5, capsize=2, linewidth=1.5, + ) + + return ratio[(ratio > 0) & np.isfinite(ratio)] + + def _wrap_legend_labels(self, labels, width=25): + """Soft-wrap legend labels at natural break points to reduce horizontal size.""" + wrapped = [] + for lab in labels: + # Split using / or _ + parts = re.split(r'(/|_)+', lab) + tokens = [] + buffer = "" + for p in parts: + if not p: + continue + candidate = (buffer + p) if buffer else p + if len(candidate) > width and buffer: + tokens.append(buffer.rstrip("_/")) + buffer = p + else: + buffer = candidate + if buffer: + tokens.append(buffer.rstrip("_/")) + + # CamelCase and digit splitting + final_tokens = [] + for tok in tokens: + if len(tok) > width: + subtoks = re.findall(r'[A-Z]?[a-z]+|[A-Z]+(?![a-z])|\d+', tok) + line = "" + for st in subtoks: + if len(line) + len(st) + 1 > width and line: + final_tokens.append(line) + line = st + else: + line = (line + st) if not line else (line + st) + if line: + final_tokens.append(line) + else: + final_tokens.append(tok) + wrapped_label = "\n".join(final_tokens) if final_tokens else lab + wrapped.append(wrapped_label) + return wrapped + + def _configure_legend(self, ax, labels, legend_title, logy=False): + """Configure legend; wrap long entries and move outside if needed.""" + if not labels: + return + + wrapped_labels = self._wrap_legend_labels(labels) + max_label_length = max(len(l.replace("\n", "")) for l in wrapped_labels) + + place_outside = ( + max_label_length > 30 + or (len(wrapped_labels) > 8 and max_label_length > 20) + ) + + legend_columns = 1 if len(wrapped_labels) <= 2 else 2 + legend_fontsize = "small" + if len(wrapped_labels) > 6: + legend_fontsize = "x-small" + if len(wrapped_labels) > 10: + legend_fontsize = "xx-small" + + legend_title_fontsize = "small" + if legend_title and len(legend_title) > 30: + legend_title_fontsize = "x-small" + + + if place_outside: + y_min, y_max = ax.get_ylim() + y_range = y_max - y_min + ax.set_ylim(y_min, y_max + y_range * 0.1) + ax.figure.subplots_adjust(right=0.9) + ax.legend( + wrapped_labels, + loc="upper left", + bbox_to_anchor=(1.01, 1.0), + borderaxespad=0.0, + title=legend_title, + fontsize=legend_fontsize, + title_fontsize=legend_title_fontsize, + frameon=False, + ) + return + + legend_padding = 0.5 if len(wrapped_labels) <= 2 else np.ceil(len(wrapped_labels) / 2) * 0.25 + legend_padding = min(legend_padding, 2.0) + if logy or ax.get_yscale() == "log": + legend_padding *= 50 + if any("\n" in l for l in wrapped_labels): + legend_padding *= 2 + y_min, y_max = ax.get_ylim() + y_range = y_max - y_min + ax.set_ylim(y_min, y_max + y_range * legend_padding) + + ax.legend( + wrapped_labels, + loc="upper right", + ncols=legend_columns, + title=legend_title, + fontsize=legend_fontsize, + title_fontsize=legend_title_fontsize, + columnspacing=1.0, + ) + + def _apply_custom_formatter(self, ax): + """Apply custom scientific notation formatter to y-axis.""" + if ax.get_yscale() != "log": + from matplotlib.ticker import ScalarFormatter + + class CustomScalarFormatter(ScalarFormatter): + def format_data_short(self, value): + if self.orderOfMagnitude != 0: + return f"×10$^{{{self.orderOfMagnitude}}}$" + return "" + + formatter = CustomScalarFormatter(useOffset=True, useMathText=True) + ax.yaxis.set_major_formatter(formatter) + + # Move y-axis scientific notation to avoid overlap with CMS label + ax.yaxis.get_offset_text().set_position((-0.01, 1.02)) + ax.yaxis.get_offset_text().set_horizontalalignment("right") + ax.yaxis.get_offset_text().set_verticalalignment("bottom") + + def _load_histograms(self, file_paths, hist_path, labels): + """Load histograms from files, returning list of histograms and valid labels.""" + histograms = [] + valid_labels = [] + + for file_path, label in zip(file_paths, labels): + try: + root_file = ROOT.TFile.Open(file_path, "READ") + if not root_file or root_file.IsZombie(): + print(f"Warning: Could not open {file_path}") + continue + + hist = root_file.Get(hist_path) + if not hist: + root_file.Close() + continue + + # Clone histogram to avoid issues when file is closed + hist_clone = hist.Clone(f"hist_{len(histograms)}") + hist_clone.SetDirectory(0) + histograms.append(hist_clone) + valid_labels.append(label) + + root_file.Close() + + except Exception as e: + print(f"Error processing {file_path}: {e}") + continue + + return histograms, valid_labels + + def extract_labels_from_hist(self, hist): + """ + Extract title and axis labels from ROOT histogram. + + Args: + hist: ROOT histogram + + Returns: + tuple: (title, xlabel, ylabel) + """ + title = hist.GetTitle() + xlabel = title + ylabel = "Occurrences" + + if " vs " in title: + if "#sigma(" in title.lower(): + parts = title.split(" vs ", 1) + ylabel = parts[0].strip()[parts[0].find('(') + 1 : (parts[0].rfind(')'))] + ylabel = r"$\delta$" + ylabel + "/" + ylabel + if "Mean" in title: + ylabel = "<" + ylabel + ">" + parts[1] = parts[1].replace("Mean", "") + elif "Sigma" in title: + ylabel = r"$\sigma$(" + ylabel + ")" + parts[1] = parts[1].replace("Sigma", "") + xlabel = parts[1].strip() + elif "Mean" in title: + title_clean = title.replace("Mean", "").strip() + parts = title_clean.split(" vs ", 1) + ylabel = "<" + parts[0].strip() + ">" + xlabel = parts[1].strip() + elif "Sigma" in title: + title_clean = title.replace("Sigma", "").strip() + parts = title_clean.split(" vs ", 1) + ylabel = r"$\sigma$" + "<" + parts[0].strip() + ">" + xlabel = parts[1].strip() + else: + # Format: "ylabel vs xlabel" + parts = title.split(" vs ", 1) + ylabel = parts[0].strip() + xlabel = parts[1].strip() + + # For profile histograms, replace mean with <...> + if hist.InheritsFrom("TProfile"): + ylabel = ylabel.replace("mean ", "<") + ">" + else: + # Pull plots + if "pull" not in title.lower(): + if "eta" in title.lower(): + xlabel = r"$\eta$" + elif "pt" in title.lower(): + xlabel = r"$p_{\mathrm{T}}$" + elif "phi" in title.lower(): + xlabel = r"$\phi$" + # Efficiency and turn-on plots + if "eff" in title.lower(): + ylabel = "Efficiency" + elif "turn-on" in title.lower(): + ylabel = "Turn-On" + + return ( + self.convert_root_to_latex(title), + self.convert_root_to_latex(xlabel), + self.convert_root_to_latex(ylabel), + ) + + def convert_root_to_latex(self, text): + """ + Convert ROOT-style notation to matplotlib mathtext format. + + Args: + text: String with ROOT notation + + Returns: + String with mathtext notation + """ + conversions = [ + ("#eta", r"$\eta$"), + ("#phi", r"$\phi$"), + ("#theta", r"$\theta$"), + ("#mu", r"$\mu$"), + ("#pi", r"$\pi$"), + ("#alpha", r"$\alpha$"), + ("#beta", r"$\beta$"), + ("#gamma", r"$\gamma$"), + ("#delta", r"$\delta$"), + ("#Delta", r"$\Delta$"), + ("#sigma", r"$\sigma$"), + ("d_{xy}", r"$d_{xy}$"), + ("d_{z}", r"$d_{z}$"), + ("#chi", r"$\chi$"), + ("#nu", r"$\nu$"), + ("#tau", r"$\tau$"), + ("p_{T}", r"$p_{\mathrm{T}}$"), + ("p_{t}", r"$p_{\mathrm{T}}$"), + ("E_{T}", r"$E_{\mathrm{T}}$"), + ("pT", r"$p_{\mathrm{T}}$"), + ("ET", r"$E_T$"), + ("GeV/c^{2}", r"GeV/$c^2$"), + ("GeV/c", r"GeV/$c$"), + ("^{2}", r"$^2$"), + ("number of", "#"), + ("Number of", "#"), + ("N of", "#"), + ("n of", "#"), + ] + + result = text + for root_notation, mathtext_notation in conversions: + result = result.replace(root_notation, mathtext_notation) + + return result + + def find_histograms_in_file(self, root_file, pattern): + """ + Find all histogram paths matching a regex pattern in a ROOT file. + + Args: + root_file: Open ROOT file + pattern: Regex pattern to match histogram paths + + Returns: + list: List of histogram paths that match the pattern + """ + + def traverse_directory(directory, current_path=""): + """Recursively traverse ROOT file directory structure.""" + hist_paths = [] + for key in directory.GetListOfKeys(): + obj_name = key.GetName() + obj_path = f"{current_path}/{obj_name}" if current_path else obj_name + + obj = key.ReadObj() + if obj.InheritsFrom("TDirectory"): + hist_paths.extend(traverse_directory(obj, obj_path)) + # Skip 2D histograms + elif obj.InheritsFrom("TH1") and not obj.InheritsFrom("TH2"): + if re.search(pattern, obj_path): + hist_paths.append(obj_path) + + return hist_paths + + return traverse_directory(root_file) + + def plot_comparison( + self, + histograms, + labels, + output_path, + logy=False, + normalize=False, + cms_text="Work in Progress", + energy_text=None, + show_energy=False, + data=False, + grid=False, + plot_histograms=False, + do_ratio=True, + save_as_pdf=False, + ): + """ + Create comparison plot with ratio panel. + + Args: + histograms: List of ROOT histograms to compare + labels: List of labels for each histogram + output_path: Output file path + logy: Use log scale for y-axis + normalize: Normalize histograms to unit area + cms_text: CMS label text + energy_text: Custom energy text (if None, uses default) + show_energy: Whether to show energy text + grid: Whether to show grid on both main and ratio plots + plot_histograms: Whether to plot histograms instead of individual bin points with errors + pdf: Whether to save as PDF in addition to PNG + """ + if len(histograms) != len(labels): + raise ValueError("Number of histograms must match number of labels") + + title, xlabel, ylabel = self.extract_labels_from_hist(histograms[0]) + + legend_title = None + if hasattr(self, "_current_output_path"): + output_parts = self._current_output_path.replace("\\", "/").split("/") + if len(output_parts) > 1: + # Get the parent directory name (second to last part) + legend_title = output_parts[-2] if len(output_parts) >= 2 else None + # Ignore legend name for summary plots + if "global" in output_parts[-1].lower() or "coll" in output_parts[-1].lower(): + legend_title = None + + fig = plt.figure(figsize=self.figsize) + gs = gridspec.GridSpec( + 2, 1, height_ratios=[1 - self.ratio_height, self.ratio_height], hspace=0.10 + ) + + ax_main = fig.add_subplot(gs[0]) + + # CMS styling + if show_energy: + if energy_text is None: + # Default energy text (13 TeV) + hep.cms.label(cms_text, data=data, ax=ax_main) + else: + hep.cms.label(cms_text, data=data, ax=ax_main, rlabel=energy_text) + else: + hep.cms.label(cms_text, data=data, ax=ax_main, rlabel="") + + ax_ratio = None + if do_ratio and len(histograms) > 1: + ax_ratio = fig.add_subplot(gs[1], sharex=ax_main) + + ref_centers = None + ref_contents = None + ref_errors = None + has_labels = False + bin_labels = None + + all_ratios = [] + ref_centers = ref_contents = ref_errors = None + + for i, (hist, label) in enumerate(zip(histograms, labels)): + bin_centers, bin_contents, bin_errors, bin_edges, bin_labels, has_labels = ( + self.root_to_numpy(hist) + ) + + if normalize and np.sum(bin_contents) > 0: + norm_factor = 1.0 / np.sum(bin_contents) + bin_contents *= norm_factor + bin_errors *= norm_factor + + # Use first histogram as reference + if i == 0: + ref_centers, ref_contents, ref_errors = bin_centers, bin_contents, bin_errors + + self._plot_histogram_data(ax_main, bin_centers, bin_contents, bin_errors, + bin_edges, label, i, plot_histograms) + + if i > 0: + valid_ratios = self._calculate_and_plot_ratio( + ax_ratio, bin_centers, bin_contents, bin_errors, + ref_centers, ref_contents, ref_errors, i, plot_histograms + ) + all_ratios.extend(valid_ratios) + + # Styling for main plot + # Calculate visible label length by removing LaTeX notation for sizing + visible_ylabel = re.sub(r"\$[^$]*\$", "X", ylabel) + label_size = "medium" if len(visible_ylabel) < 20 else "small" + if title: + ax_main.set_title(title, pad=50) + + xlabel, ylabel = self._apply_axis_scaling(ax_main, xlabel, ylabel, title, logy) + + if not has_labels: + ax_main.set_xlabel(xlabel) + + # Set x-ticks for collection plots to follow track collection names + if "collection" in title.lower(): + ax_main.set_xticks( + ref_centers, + bin_labels, + size="small", + rotation=45, + ha="right", + va="top", + ) + + ax_main.set_ylabel(ylabel, fontsize=label_size) + + self._configure_legend(ax_main, labels, legend_title, logy) + + if grid: + ax_main.grid(True, alpha=0.75, linestyle="dashdot", linewidth=0.75) + + self._apply_custom_formatter(ax_main) + + # Ratio plot styling + if ax_ratio is not None: + # Only show label in ratio and keep the same ticks as main plot + ax_main.set_xlabel("") + ax_main.tick_params(axis="x", labelbottom=False) + + # Set ratio plot limits + if len(all_ratios) > 0: + # Use percentiles to avoid extreme outliers + ratio_min = np.percentile(all_ratios, 5) + ratio_max = np.percentile(all_ratios, 95) + + # Set range around 1 in case of small fluctuations + ratio_min = min(ratio_min, 0.95) + ratio_max = max(ratio_max, 1.05) + + # Add at least 15% padding + ratio_range = ratio_max - ratio_min + padding = max(0.15, ratio_range * 0.1) + + # Clamp limits between 0.1 and 5.0 + final_min = max(0.1, ratio_min - padding) + final_max = min(5.0, ratio_max + padding) + + ax_ratio.set_ylim(final_min, final_max) + else: + ax_ratio.set_ylim(0.5, 1.5) + + if not has_labels: + ax_ratio.set_xlabel(xlabel) + + if "collection" in title.lower(): + ax_ratio.set_xticks( + ref_centers, + bin_labels, + size="small", + rotation=45, + ha="right", + va="top", + ) + + ax_ratio.set_ylabel("Ratio") + ax_ratio.axhline(y=1, color="black", linestyle="--", alpha=0.7) + + if grid: + ax_ratio.grid(True, alpha=0.75, linestyle="dashdot", linewidth=0.75) + + os.makedirs(os.path.dirname(output_path), exist_ok=True) + plt.savefig(output_path, dpi=300, bbox_inches="tight") + + if save_as_pdf: + pdf_path = output_path.rsplit(".", 1)[0] + ".pdf" + plt.savefig(pdf_path, dpi=300, bbox_inches="tight") + plt.close() + + def compare_files( + self, + file_paths, + hist_pattern, + labels=None, + output_dir="plots", + check_n_files=True, + create_web_index=False, + **plot_kwargs, + ): + """ + Compare histograms matching a pattern from multiple files. + + Args: + file_paths: List of ROOT file paths + hist_pattern: Regex pattern to match histogram paths + labels: Labels for each file (uses filename if None) + output_dir: Output directory for plots + check_n_files: Whether to enforce the 10-file limit + create_web_index: Whether to create index.php files for web viewing + **plot_kwargs: Additional arguments for plot_comparison + """ + if check_n_files: + if len(file_paths) > 10: + raise ValueError( + f"Default color palette supports a maximum of 10 files, got {len(file_paths)}. " + "Please reduce the number of input files or use the option --more-files if you are really sure of what you are doing." + ) + + if labels is None: + labels = [] + for f in file_paths: + legend_label = Path(f).stem + if legend_label.startswith("DQM_"): + legend_label = legend_label.replace("DQM_", "") + labels.append(legend_label) + elif isinstance(labels, str): + # Get legend entries from comma-separated string + labels = [label.strip() for label in labels.split(",")] + + if len(labels) != len(file_paths): + raise ValueError( + f"Number of legend entries ({len(labels)}) must match number of files ({len(file_paths)})" + ) + + all_matching_hists = set() + + print("Scanning all files for histograms matching the pattern...") + for i, file_path in enumerate(file_paths): + try: + root_file = ROOT.TFile.Open(file_path, "READ") + if not root_file or root_file.IsZombie(): + print(f"Error: Could not open {file_path}") + continue + + file_hists = self.find_histograms_in_file(root_file, hist_pattern) + all_matching_hists.update(file_hists) + root_file.Close() + + print(f"Found {len(file_hists)} matching histograms in {file_path}") + + except Exception as e: + print(f"Error scanning {file_path}: {e}") + + if not all_matching_hists: + print(f"No histograms matching pattern '{hist_pattern}' found in any file") + return + + print( + f"Total unique histograms found across all files: {len(all_matching_hists)}" + ) + matching_hists = sorted(list(all_matching_hists)) + + plot_tasks = [] + for hist_path in matching_hists: + # Generate output path preserving folder structure based on regex match + output_path = self._generate_output_path( + hist_path, hist_pattern, output_dir + ) + + plot_tasks.append( + { + "file_paths": file_paths, + "hist_path": hist_path, + "labels": labels, + "output_path": output_path, + "plot_kwargs": plot_kwargs, + } + ) + + self._compare_files(plot_tasks) + + # Create web index files if requested + if create_web_index: + print("Adding php index files for web viewing...") + self._create_web_index_files(output_dir) + + def _generate_output_path(self, hist_path, pattern, output_dir): + """ + Generate output path preserving folder structure from regex match. + + Args: + hist_path: Full histogram path from ROOT file + pattern: Regex pattern used to find the histogram + output_dir: Base output directory + + Returns: + Output file path with preserved folder structure + """ + match = re.search(pattern, hist_path) + if match: + match_end = match.end() + remaining_path = hist_path[match_end:].lstrip("/") + + if remaining_path: + path_parts = remaining_path.split("/") + + if len(path_parts) > 1: + # Create subdirectories and filename + subdirs = "/".join(path_parts[:-1]) + filename = path_parts[-1] + output_path = os.path.join(output_dir, subdirs, f"{filename}") + else: + # Single file, no subdirectories + filename = path_parts[0] + output_path = os.path.join(output_dir, f"{filename}") + else: + # If no remaining path, use the last part of the matched path + matched_part = match.group(0) + filename = matched_part.split("/")[-1] + output_path = os.path.join(output_dir, f"{filename}") + else: + hist_name = hist_path.replace("/", "_").replace("\\", "_") + output_path = os.path.join(output_dir, f"{hist_name}") + + return output_path + + def _create_web_index_files(self, output_dir): + """Create index.php files in each subdirectory for web viewing.""" + index_php_url = "https://cernbox.cern.ch/remote.php/dav/public-files/XCmC5GCFnF7Aqfd/index.php" + + try: + with urllib.request.urlopen(index_php_url) as response: + index_php_content = response.read().decode("utf-8") + except urllib.error.URLError as e: + print(f"Warning: Could not download index.php from CERNBox: {e}") + print("Skipping web index creation.") + return + + n_index = 0 + for root, dirs, files in os.walk(output_dir): + index_php_path = os.path.join(root, "index.php") + with open(index_php_path, "w", encoding="utf-8") as f: + f.write(index_php_content) + n_index += 1 + print(f"Created index.php files in {n_index} directories for web viewing") + + def _compare_files(self, plot_tasks): + """Process plotting tasks using multiprocessing.""" + print(f"Using {self.processors} parallel processes") + + with multiprocessing.Pool(self.processors) as pool: + results = [] + for i, task in enumerate(plot_tasks): + task_copy = task.copy() + task_copy.pop("progress_counter", None) + task_copy.pop("total_tasks", None) + result = pool.apply_async(process_histogram_task, (task_copy,)) + results.append((i, result)) + + completed = 0 + failed = 0 + for i, result in results: + try: + result.get(timeout=300) + completed += 1 + if completed % 10 == 0 or completed == len(plot_tasks): + print( + f"\rProgress: {completed}/{len(plot_tasks)} plots completed", + end="", + flush=True, + ) + except multiprocessing.TimeoutError: + print(f"\nTask {i} timed out") + failed += 1 + except Exception as e: + print(f"\nError in task {i}: {e}") + failed += 1 + + pool.close() + pool.join() + + if failed > 0: + print(f"\n{failed} tasks failed out of {len(plot_tasks)} total") + print() + + def _process_collection_histogram( + self, file_paths, hist_path, labels, output_path, plot_kwargs + ): + """Process a collection comparison histogram with unified track collections.""" + self._current_output_path = output_path + all_collections = set() + file_histograms = {} + + for file_path, label in zip(file_paths, labels): + try: + root_file = ROOT.TFile.Open(file_path, "READ") + if not root_file or root_file.IsZombie(): + print(f"Warning: Could not open {file_path}") + continue + + hist = root_file.Get(hist_path) + if not hist: + print(f"Warning: Histogram {hist_path} not found in {file_path}") + root_file.Close() + continue + + # Extract bin labels (track collections) + n_bins = hist.GetNbinsX() + collections = [] + for i in range(1, n_bins + 1): + label_text = hist.GetXaxis().GetBinLabel(i) + if label_text: + collections.append(label_text) + all_collections.add(label_text) + + # Store histogram data + hist_clone = hist.Clone(f"hist_{label}") + hist_clone.SetDirectory(0) + file_histograms[label] = { + "histogram": hist_clone, + "collections": collections, + } + + root_file.Close() + + except Exception as e: + print(f"Error processing {file_path}: {e}") + continue + + if not file_histograms: + print(f"No valid histograms found for {hist_path}!") + return + + # Sort collections for consistent ordering + all_collections = sorted(list(all_collections)) + + # Create unified histograms for each file + unified_histograms = [] + valid_labels = [] + + first_hist = next(iter(file_histograms.values()))["histogram"] + title, xlabel, ylabel = self.extract_labels_from_hist(first_hist) + + for label in labels: + if label not in file_histograms: + continue + + original_hist = file_histograms[label]["histogram"] + file_collections = file_histograms[label]["collections"] + + # Create new histogram with all collections + unified_hist = ROOT.TH1D( + f"unified_{label}", + original_hist.GetTitle(), + len(all_collections), + 0, + len(all_collections), + ) + unified_hist.SetDirectory(0) + + # Set bin labels for all collections + for i, collection in enumerate(all_collections): + unified_hist.GetXaxis().SetBinLabel(i + 1, collection) + + for i, collection in enumerate(file_collections): + if collection in all_collections: + unified_bin = all_collections.index(collection) + 1 + original_bin = i + 1 + + content = original_hist.GetBinContent(original_bin) + error = original_hist.GetBinError(original_bin) + + unified_hist.SetBinContent(unified_bin, content) + unified_hist.SetBinError(unified_bin, error) + + unified_histograms.append(unified_hist) + valid_labels.append(label) + + if len(unified_histograms) == 0: + print(f"No valid unified histograms created for {hist_path}!") + return + + self.plot_comparison( + unified_histograms, valid_labels, output_path, **plot_kwargs + ) + +def process_histogram_task(task): + """Process a single histogram task in a separate process.""" + try: + file_paths = task["file_paths"] + hist_path = task["hist_path"] + labels = task["labels"] + output_path = task["output_path"] + plot_kwargs = task["plot_kwargs"] + + plotter = DQMPlotter() + plotter._current_output_path = output_path + + if "_coll" in hist_path.lower(): + plotter._process_collection_histogram( + file_paths, hist_path, labels, output_path, plot_kwargs + ) + return + + histograms, valid_labels = plotter._load_histograms(file_paths, hist_path, labels) + + if len(histograms) > 0: + plotter.plot_comparison(histograms, valid_labels, output_path, **plot_kwargs) + + except Exception as e: + print(f"Error in process_histogram_task: {e}") + raise + + +if __name__ == "__main__": + """Command line interface for DQM plotter.""" + parser = argparse.ArgumentParser( + description="Compare DQM histograms with CMS styling" + ) + parser.add_argument("files", nargs="+", help="ROOT files to compare") + parser.add_argument( + "-s", + "--source", + required=True, + help="Histogram path or regex pattern to match histograms", + ) + parser.add_argument( + "-l", "--legend", help="Comma-separated list of legend entries for each file" + ) + parser.add_argument("-o", "--output-dir", default="plots", help="Output directory") + parser.add_argument("--logy", action="store_true", help="Use log scale for y-axis") + parser.add_argument("--normalize", action="store_true", help="Normalize histograms") + parser.add_argument( + "--cms-text", + default="Work in progress", + help="CMS label text (e.g. Preliminary, Work in progress)", + ) + parser.add_argument( + "--energy-text", + default=None, + help="Custom energy text in the top right corner (e.g., '14 TeV', 'Run 3')", + ) + parser.add_argument( + "--data", action="store_true", help="Use CMS datastyle for plots" + ) + parser.add_argument( + "--no-energy", action="store_true", help="Don't show energy text" + ) + parser.add_argument("--no-ratio", action="store_true", help="Disable ratio plots") + parser.add_argument( + "--no-grid", action="store_true", help="Remove grid from all plots" + ) + parser.add_argument( + "--histograms", + action="store_true", + help="Plot histograms instead of points (default: points with error bars)", + ) + parser.add_argument( + "--pdf", action="store_true", help="Also save plots in PDF format" + ) + parser.add_argument( + "--more-files", + action="store_true", + help="Allow more than 10 input files (legend might overlap with the plots, color palette only supports 10 files)", + ) + parser.add_argument( + "-n", + "--nProcessors", + type=int, + default=None, + help="Number of parallel processes to use (default: auto-detect)", + ) + parser.add_argument( + "--web", + action="store_true", + help="Create index.php files for web viewing (downloads from CernBox)", + ) + + args = parser.parse_args() + + plotter = DQMPlotter(processors=args.nProcessors) + + plotter.compare_files( + file_paths=args.files, + hist_pattern=args.source, + labels=args.legend, + output_dir=args.output_dir, + logy=args.logy, + normalize=args.normalize, + cms_text=args.cms_text, + energy_text=args.energy_text, + show_energy=not args.no_energy or args.energy_text is not None, + data=args.data, + do_ratio=not args.no_ratio, + grid=not args.no_grid, + plot_histograms=args.histograms, + save_as_pdf=args.pdf, + check_n_files=not args.more_files, + create_web_index=args.web, + ) From 212e3399f714c4b5d37f4b3b1b837995794ca2d0 Mon Sep 17 00:00:00 2001 From: Luca Ferragina Date: Tue, 14 Oct 2025 11:09:58 +0200 Subject: [PATCH 2/5] Multiple fixes for the plotter - Process multiple input regex patterns - Better handling of axes and titles labels - Introduced color palette extension - Safer handling of automatic log-scale plots --- DQMServices/Components/scripts/dqm-plot | 315 ++++++++++-------------- 1 file changed, 136 insertions(+), 179 deletions(-) diff --git a/DQMServices/Components/scripts/dqm-plot b/DQMServices/Components/scripts/dqm-plot index e27afe48d76fa..2df161b9a5c09 100755 --- a/DQMServices/Components/scripts/dqm-plot +++ b/DQMServices/Components/scripts/dqm-plot @@ -20,6 +20,7 @@ import warnings import urllib.request import urllib.error from pathlib import Path +from matplotlib import colors as _mcolors # Suppress numpy warnings about subnormal float values warnings.filterwarnings("ignore", category=UserWarning, module="numpy") @@ -72,6 +73,25 @@ class DQMPlotter: processors if processors is not None else multiprocessing.cpu_count() ) + def _extend_color_palette(self, needed: int): + """Ensure self.colors has at least 'needed' distinct entries.""" + if needed <= len(self.colors): + return + + extra_needed = needed - len(self.colors) + new_colors = [] + + cmap = plt.colormaps["hsv"] + for i in range(max(extra_needed, 1)): + rgba = cmap(i / max(extra_needed, 1)) + hexcol = _mcolors.to_hex(rgba, keep_alpha=False) + if hexcol not in self.colors and hexcol not in new_colors: + new_colors.append(hexcol) + if len(new_colors) >= extra_needed: + break + + self.colors.extend(new_colors) + def root_to_numpy(self, hist): """ Convert ROOT histogram to numpy arrays. @@ -92,7 +112,7 @@ class DQMPlotter: bin_labels = [] for i in range(1, n_bins + 1): label = hist.GetXaxis().GetBinLabel(i) - bin_labels.append(self._clean_bin_label(label) if label else str(i)) + bin_labels.append(self._clean_bin_label(label) if label else "") # Only use extracted labels if meaningful has_labels = any(label and not label.isdigit() for label in bin_labels) @@ -103,27 +123,21 @@ class DQMPlotter: """Clean up bin label for better readability.""" return label.replace("TrackSelectionHighPurity", "HP") - def _apply_axis_scaling(self, ax, xlabel, ylabel, title, logy=False): + def _apply_axis_scaling(self, ax, xlabel, ylabel, title, logy=False, has_negative_values=False): """Apply log scaling to axes based on content and user settings.""" if logy: - y_min, y_max = ax.get_ylim() - if y_max <= 0: + if has_negative_values: output_file = getattr(self, '_current_output_path', 'unknown') - print(f"\nWarning: Cannot set log scale for plot '{title}' (output: {output_file}) - no positive y-values (y_max: {y_max})") + print(f"\nWarning: Cannot set log scale for plot '{title}' (output: {output_file}) - plot contains negative y-values") else: ax.set_yscale("log") # Automatic log scale for specific variable types if "p_{\mathrm{T}}" in xlabel and "pull" not in xlabel.lower(): ax.set_xscale("log") - xlabel = r"$p_{\mathrm{T}}$ [GeV]" if ("p_{\mathrm{T}}" in ylabel and "pull" not in ylabel.lower()) or "Sigma" in title: - y_min, y_max = ax.get_ylim() - if y_max <= 0: - output_file = getattr(self, '_current_output_path', 'unknown') - print(f"\nWarning: Cannot set log scale for plot '{title}' (output: {output_file}) - no positive y-values (y_max: {y_max})") - else: + if not has_negative_values: ax.set_yscale("log") if "vertpos" in xlabel: @@ -285,7 +299,6 @@ class DQMPlotter: if legend_title and len(legend_title) > 30: legend_title_fontsize = "x-small" - if place_outside: y_min, y_max = ax.get_ylim() y_range = y_max - y_min @@ -387,42 +400,48 @@ class DQMPlotter: xlabel = title ylabel = "Occurrences" - if " vs " in title: + vs_regex = re.compile(r'[_\s]+vs[_\s]+', re.IGNORECASE) + if vs_regex.search(title): + used_underscore_delim = "_vs_" in title.lower() + parts = vs_regex.split(title, maxsplit=1) + left, right = parts[0].strip(), parts[1].strip() + + if used_underscore_delim: + left = left.replace("_", " ") + right = right.replace("_", " ") + if "#sigma(" in title.lower(): - parts = title.split(" vs ", 1) - ylabel = parts[0].strip()[parts[0].find('(') + 1 : (parts[0].rfind(')'))] - ylabel = r"$\delta$" + ylabel + "/" + ylabel + core = left[left.find("(") + 1 : left.rfind(")")] + ylabel = r"$\delta$" + core + "/" + core + right_clean = right if "Mean" in title: ylabel = "<" + ylabel + ">" - parts[1] = parts[1].replace("Mean", "") + right_clean = right_clean.replace("Mean", "") elif "Sigma" in title: ylabel = r"$\sigma$(" + ylabel + ")" - parts[1] = parts[1].replace("Sigma", "") - xlabel = parts[1].strip() + right_clean = right_clean.replace("Sigma", "") + xlabel = right_clean.strip() elif "Mean" in title: - title_clean = title.replace("Mean", "").strip() - parts = title_clean.split(" vs ", 1) - ylabel = "<" + parts[0].strip() + ">" - xlabel = parts[1].strip() + right_clean = right.replace("Mean", "").strip() + ylabel = "<" + left + ">" + xlabel = right_clean elif "Sigma" in title: - title_clean = title.replace("Sigma", "").strip() - parts = title_clean.split(" vs ", 1) - ylabel = r"$\sigma$" + "<" + parts[0].strip() + ">" - xlabel = parts[1].strip() + right_clean = right.replace("Sigma", "").strip() + ylabel = r"$\sigma$<" + left + ">" + xlabel = right_clean else: - # Format: "ylabel vs xlabel" - parts = title.split(" vs ", 1) - ylabel = parts[0].strip() - xlabel = parts[1].strip() - - # For profile histograms, replace mean with <...> - if hist.InheritsFrom("TProfile"): + # Default: "ylabel vs xlabel" + ylabel = left + xlabel = right + if hist.InheritsFrom("TProfile") and "mean " in ylabel: ylabel = ylabel.replace("mean ", "<") + ">" else: # Pull plots if "pull" not in title.lower(): if "eta" in title.lower(): xlabel = r"$\eta$" + elif "pt2" in title.lower(): + xlabel = r"$p_{\mathrm{T}}^2$" elif "pt" in title.lower(): xlabel = r"$p_{\mathrm{T}}$" elif "phi" in title.lower(): @@ -453,8 +472,6 @@ class DQMPlotter: ("#eta", r"$\eta$"), ("#phi", r"$\phi$"), ("#theta", r"$\theta$"), - ("#mu", r"$\mu$"), - ("#pi", r"$\pi$"), ("#alpha", r"$\alpha$"), ("#beta", r"$\beta$"), ("#gamma", r"$\gamma$"), @@ -466,24 +483,47 @@ class DQMPlotter: ("#chi", r"$\chi$"), ("#nu", r"$\nu$"), ("#tau", r"$\tau$"), + ("pt2", r"$p_{\mathrm{T}}^2$"), + ("Pt2", r"$p_{\mathrm{T}}^2$"), + ("pT2", r"$p_{\mathrm{T}}^2$"), ("p_{T}", r"$p_{\mathrm{T}}$"), ("p_{t}", r"$p_{\mathrm{T}}$"), - ("E_{T}", r"$E_{\mathrm{T}}$"), ("pT", r"$p_{\mathrm{T}}$"), - ("ET", r"$E_T$"), + ("pt", r"$p_{\mathrm{T}}$"), ("GeV/c^{2}", r"GeV/$c^2$"), ("GeV/c", r"GeV/$c$"), ("^{2}", r"$^2$"), ("number of", "#"), ("Number of", "#"), - ("N of", "#"), - ("n of", "#"), ] result = text for root_notation, mathtext_notation in conversions: result = result.replace(root_notation, mathtext_notation) + # Replace standalone tokens without # + token_map = { + "eta": r"$\eta$", + "phi": r"$\phi$", + "theta": r"$\theta$", + "alpha": r"$\alpha$", + "beta": r"$\beta$", + "gamma": r"$\gamma$", + "delta": r"$\delta$", + "sigma": r"$\sigma$", + "chi": r"$\chi$" + } + # Tokens not preceded by letter/digit/_ or backslash and not followed by letter/digit/_. + pattern = re.compile( + r'(? 10: raise ValueError( f"Default color palette supports a maximum of 10 files, got {len(file_paths)}. " "Please reduce the number of input files or use the option --more-files if you are really sure of what you are doing." ) + # Normalize patterns to a list + patterns = ( + list(hist_pattern) + if isinstance(hist_pattern, (list, tuple, set)) + else [hist_pattern] + ) + if labels is None: labels = [] for f in file_paths: @@ -742,7 +792,6 @@ class DQMPlotter: legend_label = legend_label.replace("DQM_", "") labels.append(legend_label) elif isinstance(labels, str): - # Get legend entries from comma-separated string labels = [label.strip() for label in labels.split(",")] if len(labels) != len(file_paths): @@ -751,8 +800,9 @@ class DQMPlotter: ) all_matching_hists = set() + hist_to_pattern = {} # remember which pattern matched each histogram - print("Scanning all files for histograms matching the pattern...") + print("Scanning all files for histograms matching the pattern(s)...") for i, file_path in enumerate(file_paths): try: root_file = ROOT.TFile.Open(file_path, "READ") @@ -760,17 +810,24 @@ class DQMPlotter: print(f"Error: Could not open {file_path}") continue - file_hists = self.find_histograms_in_file(root_file, hist_pattern) - all_matching_hists.update(file_hists) - root_file.Close() + total_for_file = 0 + for pat in patterns: + file_hists = self.find_histograms_in_file(root_file, pat) + total_for_file += len(file_hists) + all_matching_hists.update(file_hists) + # Set first matching pattern for each hist if not already set + for hp in file_hists: + hist_to_pattern.setdefault(hp, pat) - print(f"Found {len(file_hists)} matching histograms in {file_path}") + root_file.Close() + print(f"Found {total_for_file} matching histograms in {file_path}") except Exception as e: print(f"Error scanning {file_path}: {e}") if not all_matching_hists: - print(f"No histograms matching pattern '{hist_pattern}' found in any file") + joined = ", ".join(map(str, patterns)) + print(f"No histograms matching pattern(s) '{joined}' found in any file") return print( @@ -780,9 +837,10 @@ class DQMPlotter: plot_tasks = [] for hist_path in matching_hists: - # Generate output path preserving folder structure based on regex match + # Use the specific pattern that matched this histogram for path generation + matched_pat = hist_to_pattern.get(hist_path, patterns[0]) output_path = self._generate_output_path( - hist_path, hist_pattern, output_dir + hist_path, matched_pat, output_dir ) plot_tasks.append( @@ -790,14 +848,14 @@ class DQMPlotter: "file_paths": file_paths, "hist_path": hist_path, "labels": labels, + "colors": self.colors, "output_path": output_path, "plot_kwargs": plot_kwargs, } ) - self._compare_files(plot_tasks) + self._process_files(plot_tasks) - # Create web index files if requested if create_web_index: print("Adding php index files for web viewing...") self._create_web_index_files(output_dir) @@ -862,7 +920,7 @@ class DQMPlotter: n_index += 1 print(f"Created index.php files in {n_index} directories for web viewing") - def _compare_files(self, plot_tasks): + def _process_files(self, plot_tasks): """Process plotting tasks using multiprocessing.""" print(f"Using {self.processors} parallel processes") @@ -901,107 +959,6 @@ class DQMPlotter: print(f"\n{failed} tasks failed out of {len(plot_tasks)} total") print() - def _process_collection_histogram( - self, file_paths, hist_path, labels, output_path, plot_kwargs - ): - """Process a collection comparison histogram with unified track collections.""" - self._current_output_path = output_path - all_collections = set() - file_histograms = {} - - for file_path, label in zip(file_paths, labels): - try: - root_file = ROOT.TFile.Open(file_path, "READ") - if not root_file or root_file.IsZombie(): - print(f"Warning: Could not open {file_path}") - continue - - hist = root_file.Get(hist_path) - if not hist: - print(f"Warning: Histogram {hist_path} not found in {file_path}") - root_file.Close() - continue - - # Extract bin labels (track collections) - n_bins = hist.GetNbinsX() - collections = [] - for i in range(1, n_bins + 1): - label_text = hist.GetXaxis().GetBinLabel(i) - if label_text: - collections.append(label_text) - all_collections.add(label_text) - - # Store histogram data - hist_clone = hist.Clone(f"hist_{label}") - hist_clone.SetDirectory(0) - file_histograms[label] = { - "histogram": hist_clone, - "collections": collections, - } - - root_file.Close() - - except Exception as e: - print(f"Error processing {file_path}: {e}") - continue - - if not file_histograms: - print(f"No valid histograms found for {hist_path}!") - return - - # Sort collections for consistent ordering - all_collections = sorted(list(all_collections)) - - # Create unified histograms for each file - unified_histograms = [] - valid_labels = [] - - first_hist = next(iter(file_histograms.values()))["histogram"] - title, xlabel, ylabel = self.extract_labels_from_hist(first_hist) - - for label in labels: - if label not in file_histograms: - continue - - original_hist = file_histograms[label]["histogram"] - file_collections = file_histograms[label]["collections"] - - # Create new histogram with all collections - unified_hist = ROOT.TH1D( - f"unified_{label}", - original_hist.GetTitle(), - len(all_collections), - 0, - len(all_collections), - ) - unified_hist.SetDirectory(0) - - # Set bin labels for all collections - for i, collection in enumerate(all_collections): - unified_hist.GetXaxis().SetBinLabel(i + 1, collection) - - for i, collection in enumerate(file_collections): - if collection in all_collections: - unified_bin = all_collections.index(collection) + 1 - original_bin = i + 1 - - content = original_hist.GetBinContent(original_bin) - error = original_hist.GetBinError(original_bin) - - unified_hist.SetBinContent(unified_bin, content) - unified_hist.SetBinError(unified_bin, error) - - unified_histograms.append(unified_hist) - valid_labels.append(label) - - if len(unified_histograms) == 0: - print(f"No valid unified histograms created for {hist_path}!") - return - - self.plot_comparison( - unified_histograms, valid_labels, output_path, **plot_kwargs - ) - def process_histogram_task(task): """Process a single histogram task in a separate process.""" try: @@ -1013,12 +970,7 @@ def process_histogram_task(task): plotter = DQMPlotter() plotter._current_output_path = output_path - - if "_coll" in hist_path.lower(): - plotter._process_collection_histogram( - file_paths, hist_path, labels, output_path, plot_kwargs - ) - return + plotter.colors = task["colors"] histograms, valid_labels = plotter._load_histograms(file_paths, hist_path, labels) @@ -1029,7 +981,6 @@ def process_histogram_task(task): print(f"Error in process_histogram_task: {e}") raise - if __name__ == "__main__": """Command line interface for DQM plotter.""" parser = argparse.ArgumentParser( @@ -1040,7 +991,10 @@ if __name__ == "__main__": "-s", "--source", required=True, - help="Histogram path or regex pattern to match histograms", + nargs="+", + action="append", + help="Histogram path or regex pattern(s) to match. " + "Use multiple -s or space-separated values to provide several patterns.", ) parser.add_argument( "-l", "--legend", help="Comma-separated list of legend entries for each file" @@ -1098,9 +1052,12 @@ if __name__ == "__main__": plotter = DQMPlotter(processors=args.nProcessors) + # Flatten list of patterns + source_patterns = [p for group in args.source for p in group] if isinstance(args.source, list) else [args.source] + plotter.compare_files( file_paths=args.files, - hist_pattern=args.source, + hist_pattern=source_patterns, labels=args.legend, output_dir=args.output_dir, logy=args.logy, @@ -1113,6 +1070,6 @@ if __name__ == "__main__": grid=not args.no_grid, plot_histograms=args.histograms, save_as_pdf=args.pdf, - check_n_files=not args.more_files, + more_files=args.more_files, create_web_index=args.web, ) From 360c5cf3cfe7d5c97cd39913bf396ce7c3d04cb6 Mon Sep 17 00:00:00 2001 From: Luca Ferragina Date: Thu, 30 Oct 2025 10:10:22 +0100 Subject: [PATCH 3/5] More flexible legend placing and line wrapping Improved "vs" matching --- DQMServices/Components/scripts/dqm-plot | 49 +++++++++++++++++-------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/DQMServices/Components/scripts/dqm-plot b/DQMServices/Components/scripts/dqm-plot index 2df161b9a5c09..bba7cdba27c9a 100755 --- a/DQMServices/Components/scripts/dqm-plot +++ b/DQMServices/Components/scripts/dqm-plot @@ -239,7 +239,13 @@ class DQMPlotter: """Soft-wrap legend labels at natural break points to reduce horizontal size.""" wrapped = [] for lab in labels: - # Split using / or _ + # Explicit new lines + if "\\n" in lab: + explicit_lines = lab.split("\\n") + wrapped.append("\n".join(explicit_lines)) + continue + + # Automatic split using / or _ parts = re.split(r'(/|_)+', lab) tokens = [] buffer = "" @@ -275,7 +281,7 @@ class DQMPlotter: wrapped.append(wrapped_label) return wrapped - def _configure_legend(self, ax, labels, legend_title, logy=False): + def _configure_legend(self, ax, labels, legend_title, logy=False, force_outside=False): """Configure legend; wrap long entries and move outside if needed.""" if not labels: return @@ -284,20 +290,21 @@ class DQMPlotter: max_label_length = max(len(l.replace("\n", "")) for l in wrapped_labels) place_outside = ( - max_label_length > 30 + force_outside + or max_label_length > 30 or (len(wrapped_labels) > 8 and max_label_length > 20) ) legend_columns = 1 if len(wrapped_labels) <= 2 else 2 - legend_fontsize = "small" + legend_fontsize = "22" if len(wrapped_labels) > 6: - legend_fontsize = "x-small" + legend_fontsize = "20" if len(wrapped_labels) > 10: - legend_fontsize = "xx-small" + legend_fontsize = "18" - legend_title_fontsize = "small" + legend_title_fontsize = "24" if legend_title and len(legend_title) > 30: - legend_title_fontsize = "x-small" + legend_title_fontsize = "20" if place_outside: y_min, y_max = ax.get_ylim() @@ -400,9 +407,11 @@ class DQMPlotter: xlabel = title ylabel = "Occurrences" - vs_regex = re.compile(r'[_\s]+vs[_\s]+', re.IGNORECASE) + # Match "vs", "vs.", and flexible spacing/periods between v and s; allow underscores or spaces as delimiters + vs_regex = re.compile(r'[_\s]+v\s*\.?\s*s\s*\.?[_\s]+', re.IGNORECASE) if vs_regex.search(title): - used_underscore_delim = "_vs_" in title.lower() + # Detect explicit underscore-delimited form even with optional dots/spaces + used_underscore_delim = bool(re.search(r'_v\s*\.?\s*s\s*\.?_', title.lower())) parts = vs_regex.split(title, maxsplit=1) left, right = parts[0].strip(), parts[1].strip() @@ -564,7 +573,7 @@ class DQMPlotter: output_path, logy=False, normalize=False, - cms_text="Work in Progress", + cms_text="Preliminary", energy_text=None, show_energy=False, data=False, @@ -572,6 +581,7 @@ class DQMPlotter: plot_histograms=False, do_ratio=True, save_as_pdf=False, + legend_outside=False, ): """ Create comparison plot with ratio panel. @@ -588,6 +598,7 @@ class DQMPlotter: grid: Whether to show grid on both main and ratio plots plot_histograms: Whether to plot histograms instead of individual bin points with errors pdf: Whether to save as PDF in addition to PNG + legend_outside: Force legend to be placed outside the plot area """ if len(histograms) != len(labels): raise ValueError("Number of histograms must match number of labels") @@ -665,7 +676,7 @@ class DQMPlotter: label_size = "medium" if len(visible_ylabel) < 20 else "small" if title: ax_main.set_title(title, pad=50) - has_negative_values = np.any(bin_contents <= 0) + has_negative_values = np.any(bin_contents < 0) xlabel, ylabel = self._apply_axis_scaling(ax_main, xlabel, ylabel, title, logy, has_negative_values) # Set custom labels if available @@ -684,7 +695,7 @@ class DQMPlotter: ax_main.set_ylabel(ylabel, fontsize=label_size) - self._configure_legend(ax_main, labels, legend_title, logy) + self._configure_legend(ax_main, labels, legend_title, logy, force_outside=legend_outside) if grid: ax_main.grid(True, alpha=0.75, linestyle="dashdot", linewidth=0.75) @@ -800,7 +811,7 @@ class DQMPlotter: ) all_matching_hists = set() - hist_to_pattern = {} # remember which pattern matched each histogram + hist_to_pattern = {} print("Scanning all files for histograms matching the pattern(s)...") for i, file_path in enumerate(file_paths): @@ -997,14 +1008,14 @@ if __name__ == "__main__": "Use multiple -s or space-separated values to provide several patterns.", ) parser.add_argument( - "-l", "--legend", help="Comma-separated list of legend entries for each file" + "-l", "--legend", help="Comma-separated list of legend entries for each file. Use \\n for explicit line breaks." ) parser.add_argument("-o", "--output-dir", default="plots", help="Output directory") parser.add_argument("--logy", action="store_true", help="Use log scale for y-axis") parser.add_argument("--normalize", action="store_true", help="Normalize histograms") parser.add_argument( "--cms-text", - default="Work in progress", + default="Preliminary", help="CMS label text (e.g. Preliminary, Work in progress)", ) parser.add_argument( @@ -1047,6 +1058,11 @@ if __name__ == "__main__": action="store_true", help="Create index.php files for web viewing (downloads from CernBox)", ) + parser.add_argument( + "--legend-outside", + action="store_true", + help="Force legend to be placed outside the plot area", + ) args = parser.parse_args() @@ -1072,4 +1088,5 @@ if __name__ == "__main__": save_as_pdf=args.pdf, more_files=args.more_files, create_web_index=args.web, + legend_outside=args.legend_outside, ) From 4c51998ed85928f4f88cd7d90224bcbca242c771 Mon Sep 17 00:00:00 2001 From: Luca Ferragina Date: Mon, 3 Nov 2025 14:40:47 +0100 Subject: [PATCH 4/5] Add option to overlay different collections --- DQMServices/Components/scripts/dqm-plot | 278 ++++++++++++++++++++++-- 1 file changed, 259 insertions(+), 19 deletions(-) diff --git a/DQMServices/Components/scripts/dqm-plot b/DQMServices/Components/scripts/dqm-plot index bba7cdba27c9a..61631ba41551f 100755 --- a/DQMServices/Components/scripts/dqm-plot +++ b/DQMServices/Components/scripts/dqm-plot @@ -245,6 +245,19 @@ class DQMPlotter: wrapped.append("\n".join(explicit_lines)) continue + # Preserve " - " separator (for overlay labels like "File - Collection") + if " - " in lab: + parts = lab.split(" - ", 1) # Split only on first occurrence + file_part = parts[0] + collection_part = parts[1] if len(parts) > 1 else "" + + # If the combined length is too long, put on separate lines + if len(lab) > width: + wrapped.append(f"{file_part}\n- {collection_part}") + else: + wrapped.append(lab) + continue + # Automatic split using / or _ parts = re.split(r'(/|_)+', lab) tokens = [] @@ -566,6 +579,111 @@ class DQMPlotter: return traverse_directory(root_file) + def _parse_overlay_groups(self, overlay_specs): + """ + Parse overlay specifications into groups. + + Args: + overlay_specs: List of colon-separated pattern strings + + Returns: + List of lists, where each inner list contains patterns to overlay + """ + if not overlay_specs: + return [] + + groups = [] + for spec in overlay_specs: + patterns = [p.strip() for p in spec.split(":")] + if len(patterns) > 1: + groups.append(patterns) + + return groups + + def _normalize_path_for_overlay(self, hist_path, overlay_patterns): + """ + Normalize histogram path by removing overlay pattern components. + + Args: + hist_path: Full histogram path + overlay_patterns: List of patterns that should be removed for grouping + + Returns: + Normalized path (or None if path doesn't match any pattern) + """ + for pattern in overlay_patterns: + if re.search(pattern, hist_path): + # Remove the matching pattern part + normalized = re.sub(pattern, "OVERLAY_PLACEHOLDER", hist_path) + return normalized, pattern + + return None, None + + def _group_histograms_by_overlay(self, all_hists, overlay_groups): + """ + Group histograms that should be overlaid together. + + Args: + all_hists: Set of all histogram paths + overlay_groups: List of overlay pattern groups + + Returns: + Dictionary mapping normalized paths to lists of (hist_path, pattern) tuples + """ + grouped = {} + used_hists = set() + + for overlay_patterns in overlay_groups: + # Find all histograms matching any pattern in this overlay group + for hist_path in all_hists: + normalized, matched_pattern = self._normalize_path_for_overlay( + hist_path, overlay_patterns + ) + + if normalized: + if normalized not in grouped: + grouped[normalized] = [] + grouped[normalized].append((hist_path, matched_pattern)) + used_hists.add(hist_path) + + # Filter groups to only include those with multiple histograms + filtered_groups = { + norm_path: hists + for norm_path, hists in grouped.items() + if len(hists) > 1 + } + + return filtered_groups, used_hists + + def _generate_overlay_output_path(self, normalized_path, matched_patterns, + pattern, output_dir): + """ + Generate output path for overlaid histograms. + + Args: + normalized_path: Normalized path with placeholder + matched_patterns: List of patterns that were matched + pattern: Original search pattern + output_dir: Base output directory + + Returns: + Output file path + """ + # Create combined collection name from patterns + collection_names = [] + for pat in matched_patterns: + # Extract meaningful name from pattern (remove regex special chars) + name = re.sub(r'[^a-zA-Z0-9_]', '', pat) + if name: + collection_names.append(name) + + combined_name = "+".join(sorted(set(collection_names))) + + # Replace placeholder with combined name + overlay_path = normalized_path.replace("OVERLAY_PLACEHOLDER", combined_name) + + return self._generate_output_path(overlay_path, pattern, output_dir) + def plot_comparison( self, histograms, @@ -582,6 +700,7 @@ class DQMPlotter: do_ratio=True, save_as_pdf=False, legend_outside=False, + overlay_groups=None, ): """ Create comparison plot with ratio panel. @@ -599,6 +718,7 @@ class DQMPlotter: plot_histograms: Whether to plot histograms instead of individual bin points with errors pdf: Whether to save as PDF in addition to PNG legend_outside: Force legend to be placed outside the plot area + overlay_groups: List of pattern groups to overlay """ if len(histograms) != len(labels): raise ValueError("Number of histograms must match number of labels") @@ -614,6 +734,9 @@ class DQMPlotter: # Ignore legend name for summary plots if "global" in output_parts[-1].lower() or "coll" in output_parts[-1].lower(): legend_title = None + # Split long overlay titles at + marker + elif legend_title and "+" in legend_title and len(legend_title) > 30: + legend_title = legend_title.replace("+", "\n+") fig = plt.figure(figsize=self.figsize) gs = gridspec.GridSpec( @@ -765,6 +888,8 @@ class DQMPlotter: output_dir="plots", more_files=False, create_web_index=False, + overlay_groups=None, + overlay_individual=True, **plot_kwargs, ): """ @@ -777,6 +902,8 @@ class DQMPlotter: output_dir: Output directory for plots more_files: Color palette will be extended to support more than 10 files if True create_web_index: Whether to create index.php files for web viewing + overlay_groups: List of pattern groups to overlay + overlay_individual: Whether to also create individual plots for overlaid collections **plot_kwargs: Additional arguments for plot_comparison """ if more_files: @@ -826,7 +953,6 @@ class DQMPlotter: file_hists = self.find_histograms_in_file(root_file, pat) total_for_file += len(file_hists) all_matching_hists.update(file_hists) - # Set first matching pattern for each hist if not already set for hp in file_hists: hist_to_pattern.setdefault(hp, pat) @@ -844,26 +970,103 @@ class DQMPlotter: print( f"Total unique histograms found across all files: {len(all_matching_hists)}" ) - matching_hists = sorted(list(all_matching_hists)) plot_tasks = [] - for hist_path in matching_hists: - # Use the specific pattern that matched this histogram for path generation - matched_pat = hist_to_pattern.get(hist_path, patterns[0]) - output_path = self._generate_output_path( - hist_path, matched_pat, output_dir - ) - - plot_tasks.append( - { - "file_paths": file_paths, - "hist_path": hist_path, - "labels": labels, - "colors": self.colors, - "output_path": output_path, - "plot_kwargs": plot_kwargs, - } + + # Handle overlay groups if specified + if overlay_groups: + grouped_hists, used_hists = self._group_histograms_by_overlay( + all_matching_hists, overlay_groups ) + + if grouped_hists: + print(f"Found {len(grouped_hists)} histogram groups to overlay") + + # Create tasks for overlaid histograms + for normalized_path, hist_pattern_pairs in grouped_hists.items(): + # Sort by pattern to ensure consistent ordering + hist_pattern_pairs = sorted(hist_pattern_pairs, key=lambda x: x[1]) + hist_paths = [hp for hp, _ in hist_pattern_pairs] + matched_patterns = [pat for _, pat in hist_pattern_pairs] + + # Use first pattern for output path generation + base_pattern = hist_to_pattern.get(hist_paths[0], patterns[0]) + output_path = self._generate_overlay_output_path( + normalized_path, matched_patterns, base_pattern, output_dir + ) + + # For single file: overlay collections within the file + if len(file_paths) == 1: + # Load all histograms from different collections + overlay_labels = [] + for hist_path, pattern in hist_pattern_pairs: + # Extract collection name from path + collection = re.search(pattern, hist_path) + collection_name = collection.group(0) if collection else pattern + overlay_labels.append(f"{labels[0]} - {collection_name}") + + plot_tasks.append({ + "file_paths": file_paths * len(hist_paths), + "hist_path": hist_paths, + "labels": overlay_labels, + "colors": self.colors, + "output_path": output_path, + "plot_kwargs": plot_kwargs, + "is_overlay": True, + }) + else: + # For multiple files: overlay collections across files + # Create consistent ordering: all collections for file1, then all for file2, etc. + overlay_labels = [] + extended_file_paths = [] + extended_hist_paths = [] + + for file_idx, (file_path, file_label) in enumerate(zip(file_paths, labels)): + for hist_path, pattern in hist_pattern_pairs: + collection = re.search(pattern, hist_path) + collection_name = collection.group(0) if collection else pattern + overlay_labels.append(f"{file_label} - {collection_name}") + extended_file_paths.append(file_path) + extended_hist_paths.append(hist_path) + + plot_tasks.append({ + "file_paths": extended_file_paths, + "hist_path": extended_hist_paths, + "labels": overlay_labels, + "colors": self.colors, + "output_path": output_path, + "plot_kwargs": plot_kwargs, + "is_overlay": True, + }) + + # Process non-overlaid histograms separately + if overlay_individual: + # Include individual plots for overlaid collections + remaining_hists = all_matching_hists + if remaining_hists: + print(f"Processing {len(remaining_hists)} histograms (including individual overlay collections)") + else: + # Exclude overlaid histograms from individual processing + remaining_hists = all_matching_hists - used_hists + if remaining_hists: + print(f"Processing {len(remaining_hists)} non-overlaid histograms separately") + else: + remaining_hists = all_matching_hists + + # Create tasks for non-overlaid histograms (standard behavior) + for hist_path in sorted(remaining_hists): + matched_pat = hist_to_pattern.get(hist_path, patterns[0]) + output_path = self._generate_output_path(hist_path, matched_pat, output_dir) + + plot_tasks.append({ + "file_paths": file_paths, + "hist_path": hist_path, + "labels": labels, + "colors": self.colors, + "output_path": output_path, + "plot_kwargs": plot_kwargs, + "is_overlay": False, + }) self._process_files(plot_tasks) @@ -978,12 +1181,32 @@ def process_histogram_task(task): labels = task["labels"] output_path = task["output_path"] plot_kwargs = task["plot_kwargs"] + is_overlay = task["is_overlay"] plotter = DQMPlotter() plotter._current_output_path = output_path plotter.colors = task["colors"] - histograms, valid_labels = plotter._load_histograms(file_paths, hist_path, labels) + if is_overlay: + # hist_path is a list of paths for overlay mode + histograms = [] + valid_labels = [] + + if isinstance(hist_path, list): + # Process in order to maintain consistent color/marker assignment + for fp, hp, lab in zip(file_paths, hist_path, labels): + hists, vlabs = plotter._load_histograms([fp], hp, [lab]) + histograms.extend(hists) + valid_labels.extend(vlabs) + else: + histograms, valid_labels = plotter._load_histograms( + file_paths, hist_path, labels + ) + else: + # Keep user-specified order for files + histograms, valid_labels = plotter._load_histograms( + file_paths, hist_path, labels + ) if len(histograms) > 0: plotter.plot_comparison(histograms, valid_labels, output_path, **plot_kwargs) @@ -1063,6 +1286,18 @@ if __name__ == "__main__": action="store_true", help="Force legend to be placed outside the plot area", ) + parser.add_argument( + "--overlay", + action="append", + help="Overlay histograms from different collections. Specify colon-separated patterns " + "(e.g., 'hltPhase2PixelTracks:hltPhase2PixelTracksCAExtension'). " + "Can be used multiple times for different overlay groups.", + ) + parser.add_argument( + "--no-overlay-individual", + action="store_true", + help="Disable creation of individual plots for collections that are overlaid (default: create both overlay and individual plots)", + ) args = parser.parse_args() @@ -1071,6 +1306,9 @@ if __name__ == "__main__": # Flatten list of patterns source_patterns = [p for group in args.source for p in group] if isinstance(args.source, list) else [args.source] + # Parse overlay groups + overlay_groups = plotter._parse_overlay_groups(args.overlay) if args.overlay else None + plotter.compare_files( file_paths=args.files, hist_pattern=source_patterns, @@ -1089,4 +1327,6 @@ if __name__ == "__main__": more_files=args.more_files, create_web_index=args.web, legend_outside=args.legend_outside, + overlay_groups=overlay_groups, + overlay_individual=not args.no_overlay_individual, ) From be6842178dcc27ff7dab0fa31559ee65df762064 Mon Sep 17 00:00:00 2001 From: Luca Ferragina Date: Wed, 14 Jan 2026 19:24:26 +0100 Subject: [PATCH 5/5] Add unit test for dqm-plot --- DQMServices/Components/test/BuildFile.xml | 1 + DQMServices/Components/test/test_dqm-plot.sh | 27 ++++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100755 DQMServices/Components/test/test_dqm-plot.sh diff --git a/DQMServices/Components/test/BuildFile.xml b/DQMServices/Components/test/BuildFile.xml index 8ecf362568cf6..dd1f06da85a70 100644 --- a/DQMServices/Components/test/BuildFile.xml +++ b/DQMServices/Components/test/BuildFile.xml @@ -2,4 +2,5 @@ + diff --git a/DQMServices/Components/test/test_dqm-plot.sh b/DQMServices/Components/test/test_dqm-plot.sh new file mode 100755 index 0000000000000..7d998b97fafee --- /dev/null +++ b/DQMServices/Components/test/test_dqm-plot.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +function die { echo $1: status $2; exit $2; } + +REMOTE="/store/group/phys_tracking/cmssw_unittests/" +DQMFILE="DQM_V0001_R000000001__RelValTTbar_14TeV__CMSSW_12_1_0_pre5-121X_mcRun3_2021_realistic_v15-v1__DQMIO.root" +COMMMAND=`xrdfs cms-xrd-global.cern.ch locate ${REMOTE}${DQMFILE}` +STATUS=$? +echo "xrdfs command status = "$STATUS + +if [ $STATUS -eq 0 ]; then + echo "Using file ${DQMFILE}. Running in ${LOCAL_TEST_DIR}." + xrdcp root://cms-xrd-global.cern.ch/${REMOTE}${DQMFILE} . + + (dqm-plot \ + -n 4 \ + --web \ + --pdf \ + -s "DQMData/Run 1/HLT/Run summary/Tracking/ValidationWRTOffline/*" \ + --overlay "hltMergedWrtHighPurity:hltMergedWrtHighPurityPV" \ + --energy-text "TEST" \ + -l "File 1, File 2" \ + ./${DQMFILE} ./${DQMFILE}) || die 'failed running dqm-plot $DQMFILE' $? + rm -fr ./${DQMFILE} +else + die "SKIPPING test, file ${DQMFILE} not found" 0 +fi \ No newline at end of file