Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 41 additions & 33 deletions comparisons/compare-maxmem-summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
import json
import glob
import re
import sys

MAXMEM_WARN_THRESHOLD = 1.0
MAXMEM_ERROR_THRESHOLD = 10.0
import maxmem_threshold


def KILL(message):
Expand Down Expand Up @@ -50,9 +50,9 @@ def compare_maxmem_summary(**kwargs):
nalloc_pr = max_memory_pr_dict[step].get("# allocations calls")
ndalloc_pr = max_memory_pr_dict[step].get("# deallocations calls")
nlalloc_pr = nalloc_pr - ndalloc_pr if (nalloc_pr and ndalloc_pr) else 0
max_memory_pr = max_mem_pr / 1000000 if max_mem_pr else 0.0
req_memory_pr = req_mem_pr / 1000000 if req_mem_pr else 0.0
leak_memory_pr = leak_mem_pr / 1000000 if leak_mem_pr else 0.0
max_memory_pr = max_mem_pr / (1024 * 1024) if max_mem_pr else 0.0
req_memory_pr = req_mem_pr / (1024 * 1024) if req_mem_pr else 0.0
leak_memory_pr = leak_mem_pr / (1024 * 1024) if leak_mem_pr else 0.0
nallocated_pr = nalloc_pr if nalloc_pr else 0

max_mem_base = max_memory_base_dict[step].get("max memory used")
Expand All @@ -61,9 +61,9 @@ def compare_maxmem_summary(**kwargs):
nalloc_base = max_memory_base_dict[step].get("# allocations calls")
ndalloc_base = max_memory_base_dict[step].get("# deallocations calls")
nlalloc_base = nalloc_base - ndalloc_base if (nalloc_base and ndalloc_base) else 0
max_memory_base = max_mem_base / 1000000 if max_mem_base else 0.0
req_memory_base = req_mem_base / 1000000 if req_mem_base else 0.0
leak_memory_base = leak_mem_base / 1000000 if leak_mem_base else 0.0
max_memory_base = max_mem_base / (1024 * 1024) if max_mem_base else 0.0
req_memory_base = req_mem_base / (1024 * 1024) if req_mem_base else 0.0
leak_memory_base = leak_mem_base / (1024 * 1024) if leak_mem_base else 0.0
nallocated_base = nalloc_base if nalloc_base else 0

max_mem_pdiff = max_memory_pdiff_dict[step].get("max memory used")
Expand Down Expand Up @@ -199,36 +199,40 @@ def stepfn(step):
summaryLine += [
'<tr><td style="border-bottom-style:hidden;border-top-style:hidden;">&lt;PR - baseline (MB)&gt;</td>'
]
for step in sorted(workflows[workflow].keys(), key=stepfn):
summaryLine += [
'<td style="border-bottom-style:hidden;border-top-style:hidden;">',
"{:,.2f}".format(workflows[workflow][step]["max memory adiff"]),
"</td>",
]
summaryLine += [
"</tr>",
]
summaryLine += [
'<tr><td style="border-top-style:hidden">&lt;100 * (PR - baseline)/baseline &gt;</td>'
]
for step in sorted(workflows[workflow].keys(), key=stepfn):
threshold = workflows[workflow][step]["threshold"]
if not threshold:
threshold = 1.0
threshold = maxmem_threshold.WARN_THRESHOLD
error_threshold = workflows[workflow][step].get("error_threshold")
if not error_threshold:
error_threshold = 10.0
cellString = '<td style="border-top-style:hidden" '
error_threshold = maxmem_threshold.ERROR_THRESHOLD
cellString = '<td style="border-bottom-style:hidden;border-top-style:hidden;" '
color = ""
if abs(workflows[workflow][step]["max memory pdiff"]) > MAXMEM_WARN_THRESHOLD:
if workflows[workflow][step]["max memory adiff"] > threshold:
color = 'bgcolor="orange"'
if abs(workflows[workflow][step]["max memory pdiff"]) > MAXMEM_ERROR_THRESHOLD:
if workflows[workflow][step]["max memory adiff"] > error_threshold:
color = 'bgcolor="red"'
if workflows[workflow][step]["max memory adiff"] < -1 * threshold:
color = 'bgcolor="yellow"'
if workflows[workflow][step]["max memory adiff"] < -1 * error_threshold:
color = 'bgcolor="green"'
cellString += color
cellString += ">"
summaryLine += [
cellString,
"{:,.3f}".format(workflows[workflow][step]["max memory pdiff"]),
"{:,.3f}".format(workflows[workflow][step]["max memory adiff"]),
"</td>",
]
summaryLine += [
"</tr>",
]
summaryLine += [
'<tr><td style="border-top-style:hidden">&lt;100 * (PR - baseline)/baseline &gt;</td>'
]
for step in sorted(workflows[workflow].keys(), key=stepfn):
summaryLine += [
'<td style="border-top-style:hidden;">',
"{:,.2f}".format(workflows[workflow][step]["max memory pdiff"]),
"%</td>",
]
summaryLine += [
Expand Down Expand Up @@ -391,9 +395,6 @@ def stepfn(step):
"{:,}".format(workflows[workflow][step]["nallocated base"]),
"</td>",
]
summaryLine += [
"</tr>",
]
summaryLine += [
'<tr><td style="border-bottom-style:hidden;border-top-style:hidden;">&lt;pull request &gt;</td>'
]
Expand Down Expand Up @@ -435,10 +436,17 @@ def stepfn(step):
if summaryFormat == "html":
summaryLines += [
'</table><table><tr><td bgcolor="orange">'
+ "maximum memory used warn threshold %0.3f" % MAXMEM_WARN_THRESHOLD
+ '%</td></tr><tr><td bgcolor="red">'
+ "maximum memory used error threshold %0.3f" % MAXMEM_ERROR_THRESHOLD
+ "%</td></tr>",
+ "default maximum memory used warn threshold %0.0f" % maxmem_threshold.WARN_THRESHOLD
+ ' MB</td></tr><tr><td bgcolor="red">'
+ "default maximum memory used error threshold %0.0f"
% maxmem_threshold.ERROR_THRESHOLD
+ ' MB</td></tr><tr><td bgcolor="yellow">'
+ "default maximum memory used warn threshold -1 * %0.0f"
% maxmem_threshold.WARN_THRESHOLD
+ ' MB</td></tr><tr><td bgcolor="green">'
+ "default maximum memory used error threshold -1 * %0.0f"
% maxmem_threshold.ERROR_THRESHOLD
+ " MB</td></tr></table><table>",
]
summaryLines += ["</table></body></html>"]

Expand Down
32 changes: 22 additions & 10 deletions comparisons/compare-maxmem.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
#!/usr/bin/env python3
import os
import sys
import json
from collections import defaultdict

import maxmem_threshold


def create_memory_report_dict(filename):
memory_reports = dict(dict())
Expand All @@ -25,45 +28,54 @@ def create_memory_report_dict(filename):
mem_prof_base_dicts = create_memory_report_dict(sys.argv[2])

mem_prof_pdiffs_dicts = dict(dict())
mem_prof_diffs_dicts = dict(dict())

for k in mem_prof_pr_dicts.keys():
mem_prof_pdiffs_dict = dict()
mem_prof_diffs_dict = dict()
mem_prof_pr_subdict = mem_prof_pr_dicts[k]
for j, v in mem_prof_pr_subdict.items():
if j == "step":
mem_prof_pdiffs_dict[j] = v
mem_prof_diffs_dict[j] = v
else:
mem_prof_pdiffs_dict[j] = (
100
* (mem_prof_pr_dicts[k][j] - mem_prof_base_dicts[k][j])
/ mem_prof_base_dicts[k][j]
)
mem_prof_diffs_dict[j] = mem_prof_pr_dicts[k][j] - mem_prof_base_dicts[k][j]
mem_prof_pdiffs_dicts[k] = mem_prof_pdiffs_dict
mem_prof_diffs_dicts[k] = mem_prof_diffs_dict

mem_prof = {}

mem_prof["max memory pr"] = mem_prof_pr_dicts
mem_prof["max memory base"] = mem_prof_base_dicts
mem_prof["max memory pdiffs"] = mem_prof_pdiffs_dicts
WARN_THRESHOLD = 1.0
ERROR_THRESHOLD = 10.0
mem_prof["threshold"] = WARN_THRESHOLD
mem_prof["error_threshold"] = ERROR_THRESHOLD
mem_prof["max memory diffs"] = mem_prof_diffs_dicts
mem_prof["threshold"] = maxmem_threshold.WARN_THRESHOLD
mem_prof["error_threshold"] = maxmem_threshold.ERROR_THRESHOLD
mem_prof["workflow"] = sys.argv[1].split("/")[-2]
sys.stdout.write(json.dumps(mem_prof))
sys.stdout.write("\n")

errs = 0
for k in sorted(mem_prof_pdiffs_dicts.keys()):
mmu = mem_prof_pdiffs_dicts[k].get("max memory used")
for k in sorted(mem_prof_diffs_dicts.keys()):
mmu = mem_prof_diffs_dicts[k].get("max memory used")
if mmu:
if abs(mmu) > ERROR_THRESHOLD:
mmus = mmu / (1024 * 1024)
if mmus > maxmem_threshold.WARN_THRESHOLD or mmus < -1 * maxmem_threshold.WARN_THRESHOLD:
sys.stderr.write(
"Warning: Workflow %s %s max memory diff %.1f exceeds +/- %.1f MiB\n"
% (mem_prof["workflow"], k, mmus, maxmem_threshold.WARN_THRESHOLD)
)
if mmus > maxmem_threshold.ERROR_THRESHOLD or mmus < -1 * maxmem_threshold.ERROR_THRESHOLD:
errs = errs + 1
sys.stderr.write(
"Workflow %s %s max memory used percentage diff %2f%% exceeds error threshold %2f%%"
% (mem_prof["workflow"], k, abs(mmu), ERROR_THRESHOLD)
"Error: Workflow %s %s max memory diff %.1f exceeds +/- %.1f MiB\n"
% (mem_prof["workflow"], k, mmus, maxmem_threshold.ERROR_THRESHOLD)
)
sys.stderr.write("\n")

if errs > 0:
exit(10)
2 changes: 2 additions & 0 deletions comparisons/maxmem_threshold.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
WARN_THRESHOLD = 10.0
ERROR_THRESHOLD = 80.0
8 changes: 8 additions & 0 deletions pr_testing/_helper_functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,14 @@ function get_result_file_name () {
echo "21-${TEST_FLAVOR}-comparison-report.res"
return 0
;;
maxmem)
if [ "$TEST_FLAVOR" != "" ]; then
echo "23-${TEST_FLAVOR}-maxmem-report.res"
else
echo "23-maxmem-report.res"
fi
return 0
;;
esac
return 1
}
Expand Down
19 changes: 12 additions & 7 deletions pr_testing/run-pr-comparisons
Original file line number Diff line number Diff line change
Expand Up @@ -294,9 +294,9 @@ set +x
# maxmem-profile comparison
# --------------------------------------------------------------------------
echo "Started maxmem-profile comparison at `date`"
OUTPUT_DIR=$WORKSPACE/results/maxmem-comparison
MAXMEM_COMPARISON_OUTPUT_DIR=$WORKSPACE/results/maxmem-comparison
#create the output dir
mkdir -p $OUTPUT_DIR
mkdir -p $MAXMEM_COMPARISON_OUTPUT_DIR
for maxmem in $(find $WORKSPACE/data/PR-${PR_NUM} -follow -name 'maxmem_profile_*.txt' -type f | sed "s|$WORKSPACE/data/PR-${PR_NUM}/||") ; do
echo "Maxmem Profile> Working on ${maxmem}"
if [ ! -e "$WORKSPACE/data/$COMPARISON_RELEASE/${maxmem}" ] ; then
Expand All @@ -305,13 +305,18 @@ for maxmem in $(find $WORKSPACE/data/PR-${PR_NUM} -follow -name 'maxmem_profile_
fi
WF_NUMBER=$(echo ${maxmem} | sed 's|_.*||')
$CMS_BOT_DIR/comparisons/compare-maxmem.py $WORKSPACE/data/PR-${PR_NUM}/${maxmem} \
$WORKSPACE/data/$COMPARISON_RELEASE/${maxmem} > $OUTPUT_DIR/${WF_NUMBER}.json 2> $OUTPUT_DIR/${WF_NUMBER}.err || true
$WORKSPACE/data/$COMPARISON_RELEASE/${maxmem} > $MAXMEM_COMPARISON_OUTPUT_DIR/${WF_NUMBER}.json 2>> $MAXMEM_COMPARISON_OUTPUT_DIR/${WF_NUMBER}.err || true
done
$CMS_BOT_DIR/comparisons/compare-maxmem-summary.py -i $OUTPUT_DIR -f '*.json' -F html -o $OUTPUT_DIR/index.html -u $JENKINS_ARTIFACTS_URL/$PR_BASELINE_DIR || true
if grep "exceeds threshold" $OUTPUT_DIR/*.err 2>/dev/null; then
echo "MAXMEM_COMPARISON${TEST_FLAVOR_STR};OK,max memory used ${UC_TEST_FLAVOR} comparison,See results,/SDT/jenkins-artifacts/$COMP_UPLOAD_DIR/maxmem-comparison" >> ${RESULTS_FILE}
mkdir -p $WORKSPACE/testsResults
$CMS_BOT_DIR/comparisons/compare-maxmem-summary.py -i $MAXMEM_COMPARISON_OUTPUT_DIR -f '*.json' -F html -o $MAXMEM_COMPARISON_OUTPUT_DIR/maxmem_summary.html -u $JENKINS_ARTIFACTS_URL/$PR_BASELINE_DIR >$MAXMEM_COMPARISON_OUTPUT_DIR/maxmem_summary.log 2>&1 || true
if grep "Error:" $MAXMEM_COMPARISON_OUTPUT_DIR/*.err >$MAXMEM_COMPARISON_OUTPUT_DIR/maxmem_summary.log 2>/dev/null; then
echo "MAXMEM_COMPARISON${TEST_FLAVOR_STR};OK,${UC_TEST_FLAVOR} max memory used comparison failed,See failed results,/SDT/jenkins-artifacts/$COMP_UPLOAD_DIR/maxmem-comparison/maxmem_summary.html" >> ${RESULTS_FILE}
REPORT_FILE=$WORKSPACE/testsResults/$(get_result_file_name "maxmem" "${TEST_FLAVOR}" "")
Comment thread
gartung marked this conversation as resolved.
touch $REPORT_FILE
${CMS_BOT_DIR}/report-pull-request-results PARSE_MAXMEM_FAIL --no-post --unit-tests-file $MAXMEM_COMPARISON_OUTPUT_DIR/maxmem_summary.log --report-file ${REPORT_FILE} --report-url ${PR_RESULT_URL} || true
${CMS_BOT_DIR}/report-pull-request-results PARSE_MAXMEM_FAIL --unit-tests-file $MAXMEM_COMPARISON_OUTPUT_DIR/maxmem_summary.log --report-file ${REPORT_FILE} --report-url ${PR_RESULT_URL} || true
else
echo "MAXMEM_COMPARISON${TEST_FLAVOR_STR};OK,max memory used ${UC_TEST_FLAVOR} comparison,See results,/SDT/jenkins-artifacts/$COMP_UPLOAD_DIR/maxmem-comparison" >> ${RESULTS_FILE}
echo "MAXMEM_COMPARISON${TEST_FLAVOR_STR};OK,max memory used ${UC_TEST_FLAVOR} comparison,See results,/SDT/jenkins-artifacts/$COMP_UPLOAD_DIR/maxmem-comparison/maxmem_summary.html" >> ${RESULTS_FILE}
fi

# --------------------------------------------------------------------------
Expand Down
24 changes: 23 additions & 1 deletion report-pull-request-results.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
usage="usage: %prog ACTION [options] \n ACTION = PARSE_UNIT_TESTS_FAIL | PARSE_BUILD_FAIL "
"| PARSE_MATRIX_FAIL | COMPARISON_READY | GET_BASE_MESSAGE | PARSE_EXTERNAL_BUILD_FAIL "
"| PARSE_ADDON_FAIL | PARSE_CRAB_FAIL | PARSE_CLANG_BUILD_FAIL | MATERIAL_BUDGET "
"| PYTHON3_FAIL | PARSE_GPU_UNIT_TESTS_FAIL | MERGE_COMMITS"
"| PYTHON3_FAIL | PARSE_GPU_UNIT_TESTS_FAIL | MERGE_COMMITS | PARSE_MAXMEM_FAIL "
)

parser.add_option(
Expand Down Expand Up @@ -294,6 +294,26 @@ def read_material_budget_log_file(unit_tests_file):
send_message_pr(message)


#
# reads maxmem comparison error files
#
def read_maxmem_comparison_file(unit_tests_file):
errors_found = ""
err_cnt = 0
for line in openlog(unit_tests_file):
if "exceeds" in line.lower():
err_cnt += 1
errors_found += " - " + line.split(":")[1] + "\n"

if err_cnt > 0:
message = (
"\n## Max Memory Comparisons exceeding threshold\n\n"
"@cms-sw/core-l2 , I found %s workflow step(s) with memory usage exceeding the error threshold:\n\n%s"
% (err_cnt, errors_found)
)
send_message_pr(message)


def get_recent_merges_message():
message = ""
if options.recent_merges_file:
Expand Down Expand Up @@ -643,6 +663,8 @@ def complain_missing_param(param_name):
read_python3_file(options.unit_tests_file)
elif ACTION == "MATERIAL_BUDGET":
read_material_budget_log_file(options.unit_tests_file)
elif ACTION == "PARSE_MAXMEM_FAIL":
read_maxmem_comparison_file(options.unit_tests_file)
elif ACTION == "MERGE_COMMITS":
add_to_report(get_recent_merges_message())
elif ACTION == "PARSE_CUDA_UNIT_TESTS_FAIL":
Expand Down