diff --git a/comparisons/compare-maxmem-summary.py b/comparisons/compare-maxmem-summary.py index 60340bb40c1b..51583f4c08ad 100755 --- a/comparisons/compare-maxmem-summary.py +++ b/comparisons/compare-maxmem-summary.py @@ -8,9 +8,9 @@ import json import glob import re +import sys -MAXMEM_WARN_THRESHOLD = 1.0 -MAXMEM_ERROR_THRESHOLD = 10.0 +import maxmem_threshold def KILL(message): @@ -50,9 +50,9 @@ def compare_maxmem_summary(**kwargs): nalloc_pr = max_memory_pr_dict[step].get("# allocations calls") ndalloc_pr = max_memory_pr_dict[step].get("# deallocations calls") nlalloc_pr = nalloc_pr - ndalloc_pr if (nalloc_pr and ndalloc_pr) else 0 - max_memory_pr = max_mem_pr / 1000000 if max_mem_pr else 0.0 - req_memory_pr = req_mem_pr / 1000000 if req_mem_pr else 0.0 - leak_memory_pr = leak_mem_pr / 1000000 if leak_mem_pr else 0.0 + max_memory_pr = max_mem_pr / (1024 * 1024) if max_mem_pr else 0.0 + req_memory_pr = req_mem_pr / (1024 * 1024) if req_mem_pr else 0.0 + leak_memory_pr = leak_mem_pr / (1024 * 1024) if leak_mem_pr else 0.0 nallocated_pr = nalloc_pr if nalloc_pr else 0 max_mem_base = max_memory_base_dict[step].get("max memory used") @@ -61,9 +61,9 @@ def compare_maxmem_summary(**kwargs): nalloc_base = max_memory_base_dict[step].get("# allocations calls") ndalloc_base = max_memory_base_dict[step].get("# deallocations calls") nlalloc_base = nalloc_base - ndalloc_base if (nalloc_base and ndalloc_base) else 0 - max_memory_base = max_mem_base / 1000000 if max_mem_base else 0.0 - req_memory_base = req_mem_base / 1000000 if req_mem_base else 0.0 - leak_memory_base = leak_mem_base / 1000000 if leak_mem_base else 0.0 + max_memory_base = max_mem_base / (1024 * 1024) if max_mem_base else 0.0 + req_memory_base = req_mem_base / (1024 * 1024) if req_mem_base else 0.0 + leak_memory_base = leak_mem_base / (1024 * 1024) if leak_mem_base else 0.0 nallocated_base = nalloc_base if nalloc_base else 0 max_mem_pdiff = max_memory_pdiff_dict[step].get("max memory used") @@ -199,36 +199,40 @@ def stepfn(step): summaryLine += [ '<PR - baseline (MB)>' ] - for step in sorted(workflows[workflow].keys(), key=stepfn): - summaryLine += [ - '', - "{:,.2f}".format(workflows[workflow][step]["max memory adiff"]), - "", - ] - summaryLine += [ - "", - ] - summaryLine += [ - '<100 * (PR - baseline)/baseline >' - ] for step in sorted(workflows[workflow].keys(), key=stepfn): threshold = workflows[workflow][step]["threshold"] if not threshold: - threshold = 1.0 + threshold = maxmem_threshold.WARN_THRESHOLD error_threshold = workflows[workflow][step].get("error_threshold") if not error_threshold: - error_threshold = 10.0 - cellString = ' MAXMEM_WARN_THRESHOLD: + if workflows[workflow][step]["max memory adiff"] > threshold: color = 'bgcolor="orange"' - if abs(workflows[workflow][step]["max memory pdiff"]) > MAXMEM_ERROR_THRESHOLD: + if workflows[workflow][step]["max memory adiff"] > error_threshold: color = 'bgcolor="red"' + if workflows[workflow][step]["max memory adiff"] < -1 * threshold: + color = 'bgcolor="yellow"' + if workflows[workflow][step]["max memory adiff"] < -1 * error_threshold: + color = 'bgcolor="green"' cellString += color cellString += ">" summaryLine += [ cellString, - "{:,.3f}".format(workflows[workflow][step]["max memory pdiff"]), + "{:,.3f}".format(workflows[workflow][step]["max memory adiff"]), + "", + ] + summaryLine += [ + "", + ] + summaryLine += [ + '<100 * (PR - baseline)/baseline >' + ] + for step in sorted(workflows[workflow].keys(), key=stepfn): + summaryLine += [ + '', + "{:,.2f}".format(workflows[workflow][step]["max memory pdiff"]), "%", ] summaryLine += [ @@ -391,9 +395,6 @@ def stepfn(step): "{:,}".format(workflows[workflow][step]["nallocated base"]), "", ] - summaryLine += [ - "", - ] summaryLine += [ '<pull request >' ] @@ -435,10 +436,17 @@ def stepfn(step): if summaryFormat == "html": summaryLines += [ '", + + "default maximum memory used warn threshold %0.0f" % maxmem_threshold.WARN_THRESHOLD + + ' MB
' - + "maximum memory used warn threshold %0.3f" % MAXMEM_WARN_THRESHOLD - + '%
' - + "maximum memory used error threshold %0.3f" % MAXMEM_ERROR_THRESHOLD - + "%
' + + "default maximum memory used error threshold %0.0f" + % maxmem_threshold.ERROR_THRESHOLD + + ' MB
' + + "default maximum memory used warn threshold -1 * %0.0f" + % maxmem_threshold.WARN_THRESHOLD + + ' MB
' + + "default maximum memory used error threshold -1 * %0.0f" + % maxmem_threshold.ERROR_THRESHOLD + + " MB
", ] summaryLines += ["
"] diff --git a/comparisons/compare-maxmem.py b/comparisons/compare-maxmem.py index e7e704f19eb9..5e9b808a941e 100755 --- a/comparisons/compare-maxmem.py +++ b/comparisons/compare-maxmem.py @@ -1,8 +1,11 @@ #!/usr/bin/env python3 +import os import sys import json from collections import defaultdict +import maxmem_threshold + def create_memory_report_dict(filename): memory_reports = dict(dict()) @@ -25,45 +28,54 @@ def create_memory_report_dict(filename): mem_prof_base_dicts = create_memory_report_dict(sys.argv[2]) mem_prof_pdiffs_dicts = dict(dict()) +mem_prof_diffs_dicts = dict(dict()) for k in mem_prof_pr_dicts.keys(): mem_prof_pdiffs_dict = dict() + mem_prof_diffs_dict = dict() mem_prof_pr_subdict = mem_prof_pr_dicts[k] for j, v in mem_prof_pr_subdict.items(): if j == "step": mem_prof_pdiffs_dict[j] = v + mem_prof_diffs_dict[j] = v else: mem_prof_pdiffs_dict[j] = ( 100 * (mem_prof_pr_dicts[k][j] - mem_prof_base_dicts[k][j]) / mem_prof_base_dicts[k][j] ) + mem_prof_diffs_dict[j] = mem_prof_pr_dicts[k][j] - mem_prof_base_dicts[k][j] mem_prof_pdiffs_dicts[k] = mem_prof_pdiffs_dict + mem_prof_diffs_dicts[k] = mem_prof_diffs_dict mem_prof = {} mem_prof["max memory pr"] = mem_prof_pr_dicts mem_prof["max memory base"] = mem_prof_base_dicts mem_prof["max memory pdiffs"] = mem_prof_pdiffs_dicts -WARN_THRESHOLD = 1.0 -ERROR_THRESHOLD = 10.0 -mem_prof["threshold"] = WARN_THRESHOLD -mem_prof["error_threshold"] = ERROR_THRESHOLD +mem_prof["max memory diffs"] = mem_prof_diffs_dicts +mem_prof["threshold"] = maxmem_threshold.WARN_THRESHOLD +mem_prof["error_threshold"] = maxmem_threshold.ERROR_THRESHOLD mem_prof["workflow"] = sys.argv[1].split("/")[-2] sys.stdout.write(json.dumps(mem_prof)) sys.stdout.write("\n") errs = 0 -for k in sorted(mem_prof_pdiffs_dicts.keys()): - mmu = mem_prof_pdiffs_dicts[k].get("max memory used") +for k in sorted(mem_prof_diffs_dicts.keys()): + mmu = mem_prof_diffs_dicts[k].get("max memory used") if mmu: - if abs(mmu) > ERROR_THRESHOLD: + mmus = mmu / (1024 * 1024) + if mmus > maxmem_threshold.WARN_THRESHOLD or mmus < -1 * maxmem_threshold.WARN_THRESHOLD: + sys.stderr.write( + "Warning: Workflow %s %s max memory diff %.1f exceeds +/- %.1f MiB\n" + % (mem_prof["workflow"], k, mmus, maxmem_threshold.WARN_THRESHOLD) + ) + if mmus > maxmem_threshold.ERROR_THRESHOLD or mmus < -1 * maxmem_threshold.ERROR_THRESHOLD: errs = errs + 1 sys.stderr.write( - "Workflow %s %s max memory used percentage diff %2f%% exceeds error threshold %2f%%" - % (mem_prof["workflow"], k, abs(mmu), ERROR_THRESHOLD) + "Error: Workflow %s %s max memory diff %.1f exceeds +/- %.1f MiB\n" + % (mem_prof["workflow"], k, mmus, maxmem_threshold.ERROR_THRESHOLD) ) - sys.stderr.write("\n") if errs > 0: exit(10) diff --git a/comparisons/maxmem_threshold.py b/comparisons/maxmem_threshold.py new file mode 100644 index 000000000000..a9fd77dbed87 --- /dev/null +++ b/comparisons/maxmem_threshold.py @@ -0,0 +1,2 @@ +WARN_THRESHOLD = 10.0 +ERROR_THRESHOLD = 80.0 diff --git a/pr_testing/_helper_functions.sh b/pr_testing/_helper_functions.sh index bb44f8c507e6..ff740cd6ed8d 100755 --- a/pr_testing/_helper_functions.sh +++ b/pr_testing/_helper_functions.sh @@ -189,6 +189,14 @@ function get_result_file_name () { echo "21-${TEST_FLAVOR}-comparison-report.res" return 0 ;; + maxmem) + if [ "$TEST_FLAVOR" != "" ]; then + echo "23-${TEST_FLAVOR}-maxmem-report.res" + else + echo "23-maxmem-report.res" + fi + return 0 + ;; esac return 1 } diff --git a/pr_testing/run-pr-comparisons b/pr_testing/run-pr-comparisons index fa3b29145393..ae2e8bcb992b 100755 --- a/pr_testing/run-pr-comparisons +++ b/pr_testing/run-pr-comparisons @@ -294,9 +294,9 @@ set +x # maxmem-profile comparison # -------------------------------------------------------------------------- echo "Started maxmem-profile comparison at `date`" -OUTPUT_DIR=$WORKSPACE/results/maxmem-comparison +MAXMEM_COMPARISON_OUTPUT_DIR=$WORKSPACE/results/maxmem-comparison #create the output dir -mkdir -p $OUTPUT_DIR +mkdir -p $MAXMEM_COMPARISON_OUTPUT_DIR for maxmem in $(find $WORKSPACE/data/PR-${PR_NUM} -follow -name 'maxmem_profile_*.txt' -type f | sed "s|$WORKSPACE/data/PR-${PR_NUM}/||") ; do echo "Maxmem Profile> Working on ${maxmem}" if [ ! -e "$WORKSPACE/data/$COMPARISON_RELEASE/${maxmem}" ] ; then @@ -305,13 +305,18 @@ for maxmem in $(find $WORKSPACE/data/PR-${PR_NUM} -follow -name 'maxmem_profile_ fi WF_NUMBER=$(echo ${maxmem} | sed 's|_.*||') $CMS_BOT_DIR/comparisons/compare-maxmem.py $WORKSPACE/data/PR-${PR_NUM}/${maxmem} \ - $WORKSPACE/data/$COMPARISON_RELEASE/${maxmem} > $OUTPUT_DIR/${WF_NUMBER}.json 2> $OUTPUT_DIR/${WF_NUMBER}.err || true + $WORKSPACE/data/$COMPARISON_RELEASE/${maxmem} > $MAXMEM_COMPARISON_OUTPUT_DIR/${WF_NUMBER}.json 2>> $MAXMEM_COMPARISON_OUTPUT_DIR/${WF_NUMBER}.err || true done -$CMS_BOT_DIR/comparisons/compare-maxmem-summary.py -i $OUTPUT_DIR -f '*.json' -F html -o $OUTPUT_DIR/index.html -u $JENKINS_ARTIFACTS_URL/$PR_BASELINE_DIR || true -if grep "exceeds threshold" $OUTPUT_DIR/*.err 2>/dev/null; then - echo "MAXMEM_COMPARISON${TEST_FLAVOR_STR};OK,max memory used ${UC_TEST_FLAVOR} comparison,See results,/SDT/jenkins-artifacts/$COMP_UPLOAD_DIR/maxmem-comparison" >> ${RESULTS_FILE} +mkdir -p $WORKSPACE/testsResults +$CMS_BOT_DIR/comparisons/compare-maxmem-summary.py -i $MAXMEM_COMPARISON_OUTPUT_DIR -f '*.json' -F html -o $MAXMEM_COMPARISON_OUTPUT_DIR/maxmem_summary.html -u $JENKINS_ARTIFACTS_URL/$PR_BASELINE_DIR >$MAXMEM_COMPARISON_OUTPUT_DIR/maxmem_summary.log 2>&1 || true +if grep "Error:" $MAXMEM_COMPARISON_OUTPUT_DIR/*.err >$MAXMEM_COMPARISON_OUTPUT_DIR/maxmem_summary.log 2>/dev/null; then + echo "MAXMEM_COMPARISON${TEST_FLAVOR_STR};OK,${UC_TEST_FLAVOR} max memory used comparison failed,See failed results,/SDT/jenkins-artifacts/$COMP_UPLOAD_DIR/maxmem-comparison/maxmem_summary.html" >> ${RESULTS_FILE} + REPORT_FILE=$WORKSPACE/testsResults/$(get_result_file_name "maxmem" "${TEST_FLAVOR}" "") + touch $REPORT_FILE + ${CMS_BOT_DIR}/report-pull-request-results PARSE_MAXMEM_FAIL --no-post --unit-tests-file $MAXMEM_COMPARISON_OUTPUT_DIR/maxmem_summary.log --report-file ${REPORT_FILE} --report-url ${PR_RESULT_URL} || true + ${CMS_BOT_DIR}/report-pull-request-results PARSE_MAXMEM_FAIL --unit-tests-file $MAXMEM_COMPARISON_OUTPUT_DIR/maxmem_summary.log --report-file ${REPORT_FILE} --report-url ${PR_RESULT_URL} || true else - echo "MAXMEM_COMPARISON${TEST_FLAVOR_STR};OK,max memory used ${UC_TEST_FLAVOR} comparison,See results,/SDT/jenkins-artifacts/$COMP_UPLOAD_DIR/maxmem-comparison" >> ${RESULTS_FILE} + echo "MAXMEM_COMPARISON${TEST_FLAVOR_STR};OK,max memory used ${UC_TEST_FLAVOR} comparison,See results,/SDT/jenkins-artifacts/$COMP_UPLOAD_DIR/maxmem-comparison/maxmem_summary.html" >> ${RESULTS_FILE} fi # -------------------------------------------------------------------------- diff --git a/report-pull-request-results.py b/report-pull-request-results.py index cddfe139a1ca..17dab45a3aaa 100755 --- a/report-pull-request-results.py +++ b/report-pull-request-results.py @@ -26,7 +26,7 @@ usage="usage: %prog ACTION [options] \n ACTION = PARSE_UNIT_TESTS_FAIL | PARSE_BUILD_FAIL " "| PARSE_MATRIX_FAIL | COMPARISON_READY | GET_BASE_MESSAGE | PARSE_EXTERNAL_BUILD_FAIL " "| PARSE_ADDON_FAIL | PARSE_CRAB_FAIL | PARSE_CLANG_BUILD_FAIL | MATERIAL_BUDGET " - "| PYTHON3_FAIL | PARSE_GPU_UNIT_TESTS_FAIL | MERGE_COMMITS" + "| PYTHON3_FAIL | PARSE_GPU_UNIT_TESTS_FAIL | MERGE_COMMITS | PARSE_MAXMEM_FAIL " ) parser.add_option( @@ -294,6 +294,26 @@ def read_material_budget_log_file(unit_tests_file): send_message_pr(message) +# +# reads maxmem comparison error files +# +def read_maxmem_comparison_file(unit_tests_file): + errors_found = "" + err_cnt = 0 + for line in openlog(unit_tests_file): + if "exceeds" in line.lower(): + err_cnt += 1 + errors_found += " - " + line.split(":")[1] + "\n" + + if err_cnt > 0: + message = ( + "\n## Max Memory Comparisons exceeding threshold\n\n" + "@cms-sw/core-l2 , I found %s workflow step(s) with memory usage exceeding the error threshold:\n\n%s" + % (err_cnt, errors_found) + ) + send_message_pr(message) + + def get_recent_merges_message(): message = "" if options.recent_merges_file: @@ -643,6 +663,8 @@ def complain_missing_param(param_name): read_python3_file(options.unit_tests_file) elif ACTION == "MATERIAL_BUDGET": read_material_budget_log_file(options.unit_tests_file) +elif ACTION == "PARSE_MAXMEM_FAIL": + read_maxmem_comparison_file(options.unit_tests_file) elif ACTION == "MERGE_COMMITS": add_to_report(get_recent_merges_message()) elif ACTION == "PARSE_CUDA_UNIT_TESTS_FAIL":