Improve what goes into JUnit XML for reporting with Jenkins (#26771)

This PR makes some improvements to the way that we collect JUnit XML entries, which are responsible for 1) flagging Jenkins jobs as "unstable" (as opposed to passing) and 2) showing what failed from the web interface. It does so in the following ways: 1. Hardens the log -> XML extractor against unrelated output. E.g., some prediffs or checks used to print `[Skipping ...`, which was picked up as "skipping a test" when parsing the log output. This is not correct, so this PR introduces an allowlist of all possible skip markers. 2. Ensures that `sub_test` crashes are caught. To this end, standardizes output from failing `sub_test` invocations in `start_test`, and starts treating "Failure running sub_test" as a waypoint of starting / ending a test block (along with `Starting sub_test` and `Finished sub_test`, neither of which are necessarily printed if `sub_test` fails). In doing so, begins detecting failed `sub_test` invocations, and includes them in the JUnit XML. 3. Adds a new script to generate JUnit XML from failed system commands. To this end, stores a list of all executed commands, and adjusts the mysystemlog to include blank lines for commands that succeeded. 4. Adds a new script `merge_junit_xmls` that does as its name suggests. Using this script... * ...incorporates JUnit output from the Chapel Language Server tests into the final report. * ...incorporates errors from system commands (see above) in the final report. The net result is that a _lot_ more failures are detected and reported. Given the exact same set of failures: * A failing chplcheck test * A `sub_test` that crashes after 5 tests * One successful and one failing system command in `nightly` * A failing CLS test The previous script generated the following report: <img width="481" alt="Screenshot 2025-02-24 at 3 45 38 PM" src="https://github.com/user-attachments/assets/0449a19c-5e72-4451-982d-866d03180f52" /> The new script generates the following, much more robust, report: <img width="849" alt="Screenshot 2025-02-24 at 3 45 51 PM" src="https://github.com/user-attachments/assets/85afb801-4632-49da-b377-b5d382f3d038" /> Reviewed by @jabraham17 -- thanks! # Testing - manual invocation of `nightly`
chapel-lang · Feb 25, 2025 · 0e5c289 · 0e5c289
2 parents b57bff8 + f9bd88a
commit 0e5c289
Show file tree

Hide file tree

Showing 12 changed files with 199 additions and 25 deletions.
diff --git a/Makefile.devel b/Makefile.devel
@@ -76,6 +76,10 @@ test-cls test-chpl-language-server: FORCE
 	@$(MAKE) chpl-language-server
 	@cd tools/chpl-language-server && $(MAKE) test-chpl-language-server
 
+test-cls-junit test-chpl-language-server-junit: FORCE
+	@$(MAKE) chpl-language-server
+	@cd tools/chpl-language-server && $(MAKE) test-chpl-language-server-junit
+
 run-frontend-linters: FORCE
 	@cd compiler && $(MAKE) run-frontend-linters
 

diff --git a/third-party/chpl-venv/test-requirements.txt b/third-party/chpl-venv/test-requirements.txt
@@ -2,3 +2,4 @@ PyYAML==6.0.2
 filelock==3.16.1
 argcomplete==3.5.1
 setuptools==75.1.0
+junitparser==3.2.0
diff --git a/tools/chpl-language-server/Makefile b/tools/chpl-language-server/Makefile
@@ -44,6 +44,11 @@ test-chpl-language-server: chpl-language-server chpl-language-server-test-venv
 	$(CHPL_MAKE_HOME)/util/config/run-in-venv-with-python-bindings.bash \
 		$(CHPL_MAKE_PYTHON) -m pytest test/*.py
 
+# Same as above, but generates XML report
+test-chpl-language-server-junit: chpl-language-server chpl-language-server-test-venv
+	$(CHPL_MAKE_HOME)/util/config/run-in-venv-with-python-bindings.bash \
+		$(CHPL_MAKE_PYTHON) -m pytest test/*.py --junit-xml=test-chpl-language-server-report.xml
+
 clean: clean-link clean-link-shim clean-pycache
 
 cleanall: clean

diff --git a/util/cron/nightly b/util/cron/nightly
@@ -457,7 +457,9 @@ if (exists($ENV{'CHPL_NIGHTLY_TEST_CONFIG_NAME'})) {
 $mysystemlog = "$basetmpdir/mysystemerrs-$config_name.txt";
 $emailOnError = $mysystemlog;
 unlink($mysystemlog); # the file gets appended to, so clear it first.
+unlink("$mysystemlog.clean"); # the file gets appended to, so clear it first.
 ensureMysystemlogExists($mysystemlog); # empty (vs nonexistent) file is easier to compare against
+ensureMysystemlogExists("$mysystemlog.clean"); # also create a 'clean' file which lists all the commands so that we can generate JUnit report.
 $prevmysystemlog = "$cronlogdir/last-mysystemerrs-$config_name.txt";
 
 
@@ -752,8 +754,8 @@ if (exists($ENV{"CHPL_START_TEST_ARGS"})) {
 
 if ($chplLanguageServer == 1) {
     $ENV{'CHPL_HOME'} = $chplhomedir;
-    $testclscommand = "cd $ENV{'CHPL_HOME'} && . util/setchplenv.sh && make -j$num_procs test-chpl-language-server";
-    mysystem($testclscommand, "running `make test-chpl-language-server`", $emailOnError, 1);
+    $testclscommand = "cd $ENV{'CHPL_HOME'} && . util/setchplenv.sh && make -j$num_procs test-chpl-language-server-junit";
+    mysystem($testclscommand, "running `make test-chpl-language-server-junit`", $emailOnError, 1);
 }
 
 if ($runtests == 0) {
@@ -797,9 +799,15 @@ if ($runtests == 0) {
         $status = mysystem("cd $testdir && ../util/test/paratest.server -nodefile $parnodefile $testflags", "running parallel tests", $ignoreErrors);
     }
 
-
     $endtime = localtime;
 
+#
+# Incorporate test results from the language server into the JUnit XML report
+#
+    if ($junit_xml == 1 and $chplLanguageServer == 1) {
+        mysystem("cd $chplhomedir && ./util/test/merge_junit_xmls chapel-tests.xml tools/chpl-language-server/test-chpl-language-server-report.xml", "incorporating CLS JUnit results", $ignoreErrors);
+    }
+
 #
 # Sync performance graphs to dreamhost.
 #
@@ -881,6 +889,15 @@ if ($runtests == 0) {
         }
     }
 
+#
+# Generate a new JUnit XML report for any failures that occurred in the
+# $mysystemlog (and thus should be emailed about). This way, we can make the
+# job status "unstable".
+#
+
+    mysystem("cd $chplhomedir && ./util/test/convert_nightly_system_command_log_to_junit_xml.py $mysystemlog.clean $mysystemlog mysystemlog.xml", "generating JUnit results from system commands", $ignoreErrors);
+    mysystem("cd $chplhomedir && ./util/test/merge_junit_xmls chapel-tests.xml mysystemlog.xml", "incorporating JUnit results from system commands", $ignoreErrors);
+
 # FIXME: Pass correct args here!
      # `$chplhomedir/util/cron/nightly_email.pl $status "$rawsummary" "$sortedsummary" "$prevsummary" "$mailer" "$nochangerecipient" "$recipient" "$subjectid" "$config_name" "$revision" "$rawlog" "$starttime" "$endtime" "$crontab" "$testdirs" $debug`;
      # Write the test results to the summary log.

diff --git a/util/cron/nightly_email_file.pm b/util/cron/nightly_email_file.pm
@@ -43,7 +43,7 @@ sub nightly_email_file{
     # Nothing sorts the system log for us; do that now, so that 'comm' is
     # given a lexically sorted input as it expects.
     $sortedmysystemlog = $mysystemlog . ".sorted";
-    `LC_ALL=C sort $mysystemlog > $sortedmysystemlog`;
+    `LC_ALL=C sort $mysystemlog | grep -v '^\\s*\$' > $sortedmysystemlog`;
 
     # Ensure the "previous" summary exists, e.g. if this is the first run of the
     # configuration they won't.

diff --git a/util/cron/nightlysubs.pm b/util/cron/nightlysubs.pm
@@ -26,6 +26,19 @@ sub mysystem {
     $onerror = $_[2];
     $showcommand = $_[3];
 
+    my $shouldExitOnError = $onerror eq $exitOnError;
+    my $shouldIgnoreErrors = $onerror eq $ignoreErrors;
+    my $shouldWriteToFile = !$shouldExitOnError && !$shouldIgnoreErrors;
+
+    # Save the command into a file if we're emailing about it, so that
+    # we have a clean list of commands that were run.
+    # We can then compare it against the list of errors and generate a report.
+    if ($shouldWriteToFile) {
+        open(my $SF, '>>', "$onerror.clean") or die "Could not open file '$onerror.clean' $!";
+        print $SF "Running $errorname: $command\n";
+        close($SF);
+    }
+
     if ($showcommand) { print "Executing $command\n"; }
     my $status = system($command);
     if ($status != 0) {
@@ -36,14 +49,18 @@ sub mysystem {
         print "Error $errorname: $status\n";
         if ($onerror eq $exitOnError) {
             exit 1;
-        } elsif ($onerror eq $ignoreErrors) {
-            # Do nothing
-        } else {
-            open(my $SF, '>>', $onerror) or die "Could not open file '$onerror' $!";
-            print $SF "Error $errorname: $status\n";
-            close($SF);
         }
     }
+
+    if ($shouldWriteToFile) {
+        my $toWrite = "\n";
+        if ($status != 0) {
+            $toWrite = "Error when running $errorname (code $status)\n";
+        }
+        open(my $SF, '>>', "$onerror") or die "Could not open file '$onerror' $!";
+        print $SF $toWrite;
+        close($SF);
+    }
     $status;
 }
 
@@ -91,7 +108,7 @@ sub ensureSummaryExists {
 }
 
 sub ensureMysystemlogExists {
-    $mysystemlog = $_[0];
+    my $mysystemlog = $_[0];
     if (! -r $mysystemlog) {
         print "Creating $mysystemlog\n";
         `touch $mysystemlog`

diff --git a/util/test/convert_nightly_system_command_log_to_junit_xml.py b/util/test/convert_nightly_system_command_log_to_junit_xml.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import argparse
+import xml.etree.ElementTree as XML
+
+parser = argparse.ArgumentParser(description='Convert nightly system command logs to JUnit XML.')
+parser.add_argument('cleanlog', type=str, help='Log listing all commands that were executed')
+parser.add_argument('errorlog', type=str, help='Log listing errors in commands that were executed')
+parser.add_argument('output', type=str, help='Output JUnit XML file')
+
+args = parser.parse_args()
+
+with open(args.cleanlog, 'r') as f:
+    cleanlog = f.readlines()
+
+with open(args.errorlog, 'r') as f:
+    errorlog = f.readlines()
+
+test_suites = XML.Element('testsuites')
+test_suite = XML.SubElement(test_suites, 'testsuite')
+
+# Error lines are empty when no error occurred.
+num_errors = 0
+for (cleanline, errline) in zip(cleanlog, errorlog):
+    cleanline = cleanline.strip()
+    errline = errline.strip()
+
+    test_case = XML.SubElement(test_suite, 'testcase')
+    test_case.set('name', cleanline)
+    test_case.set('classname', 'nightly system command')
+    test_case.set('time', '0')
+
+    if errline:
+        failure = XML.SubElement(test_case, 'failure')
+        failure.set('message', errline)
+        num_errors += 1
+
+test_suite.set('name', 'nightly system commands')
+test_suite.set('errors', str(num_errors))
+test_suite.set('failures', str(num_errors))
+test_suite.set('tests', str(len(cleanlog)))
+
+tree = XML.ElementTree(test_suites)
+tree.write(args.output)
diff --git a/util/test/convert_start_test_log_to_junit_xml.py b/util/test/convert_start_test_log_to_junit_xml.py
@@ -23,6 +23,23 @@
 
 DEBUG = False
 
+SKIP_MARKERS = (
+    "[Skipping directory",
+    "[Skipping test",
+    "[Skipping noperf test",
+    "[Skipping notest test",
+    "[Skipping future test",
+    "[Skipping non-future test",
+    "[Skipping future test without a skipif",
+    "[Skipping c test",
+    "[Skipping c++ test",
+    "[Skipping multilocale-only c test",
+    "[Skipping multilocale-only c++ test",
+    "[Skipping interpretation of",
+)
+
+SUBTEST_STARTS = ('[Starting subtest - ', '[Error running sub_test')
+SUBTEST_ENDS = ('[Finished subtest ', '[Error running sub_test')
 
 def main():
     """Parse cli arguments and convert a start_test log file to jUnit xml
@@ -50,7 +67,9 @@ def _create_junit_report(test_cases, junit_file):
     :arg junit_file: filename to write the jUnit XML report
     """
     logging.debug('Creating jUnit XML report at: {0}'.format(junit_file))
-    test_suite = XML.Element('testsuite')
+    test_suites = XML.Element('testsuites')
+    test_suite = XML.SubElement(test_suites, 'testsuite')
+    num_errors = 0
 
     for test_case in test_cases:
         case_elem = XML.SubElement(test_suite, 'testcase')
@@ -66,12 +85,18 @@ def _create_junit_report(test_cases, junit_file):
             error_elem = XML.SubElement(case_elem, 'error')
             error_elem.set('message', test_error['message'])
             error_elem.text = test_error['content']
+            num_errors += 1
 
         system_out = XML.SubElement(case_elem, 'system-out')
         system_out.text = test_case['system-out']
 
+    test_suite.set('errors', str(num_errors))
+    test_suite.set('failures', str(num_errors))
+    test_suite.set('tests', str(len(test_cases)))
+    test_suite.set('name', 'start_test results')
+
     encoding = "unicode" if sys.version_info[0] >= 3 else "us-ascii"
-    xml_content = XML.tostring(test_suite, encoding=encoding)
+    xml_content = XML.tostring(test_suites, encoding=encoding)
     xml_content = _clean_xml(xml_content)
     with open(junit_file, 'w') as fp:
         fp.write(xml_content)
@@ -93,10 +118,16 @@ def _parse_start_test_log(start_test_log):
     logging.debug('Read {0} lines from "{1}".'.format(
         len(start_test_lines), start_test_log))
 
+    # In order to catch all test results we can despite sub_test failures,
+    # the code below deliberately accepts empty or non-empty "Error running sub_test"
+    # blocks. But they are reported again in the summary. To avoid duplicates,
+    # we keep track of the sub_test failures we have seen.
+    seen_sub_test_failures = set()
+
     test_cases = []
     while len(start_test_lines) > 0:
         subtest_start, subtest_end = _get_block(
-            start_test_lines, '[Starting subtest - ', '[Finished subtest ')
+            start_test_lines, SUBTEST_STARTS, SUBTEST_ENDS)
 
         # No more sub_tests; delete the remaining lines and finish up.
         if subtest_start == -1:
@@ -108,6 +139,16 @@ def _parse_start_test_log(start_test_log):
         sub_test_lines = start_test_lines[subtest_start:subtest_end+1:1]
         del start_test_lines[:subtest_end+1]
 
+        subtest_error = None
+        subtest_file = None
+        if sub_test_lines[-1].startswith('[Error running sub_test'):
+            match = re.match(r'\[Error running sub_test \(code [\d]+\) (?:in|for) (.*)\]', sub_test_lines[-1])
+            if match and match.group(1) not in seen_sub_test_failures:
+                subtest_error = sub_test_lines[-1]
+                subtest_file = match.group(1)
+                seen_sub_test_failures.add(subtest_file)
+
+        lines_after_no_more_tests = []
         while len(sub_test_lines) > 0:
             test_start, test_end = _get_block(
                 sub_test_lines,
@@ -116,7 +157,7 @@ def _parse_start_test_log(start_test_log):
             test_start_skip, test_end_skip = _get_block(
                 sub_test_lines,
                 '[test: ',
-                '[Skipping')
+                SKIP_MARKERS)
 
             test_skipped = False
             if test_end_skip != -1 and (test_end == -1 or test_end_skip < test_end):
@@ -149,6 +190,7 @@ def _parse_start_test_log(start_test_log):
 
             # No more test cases; delete remaining lines and finish up.
             if test_start == -1:
+                lines_after_no_more_tests = [l for l in sub_test_lines]
                 del sub_test_lines[:]
                 continue
 
@@ -178,6 +220,19 @@ def _parse_start_test_log(start_test_log):
 
             test_cases.append(test_case)
 
+        # If we got a subtest error, consider all the remaining output from the
+        # subtest as output from a "test" that failed to run (the subtest itself).
+        if subtest_error is not None:
+            test_case = {
+                'name': 'subtest script',
+                'classname': subtest_file,
+                'time': 0.0,
+                'error': {'message': 'Error running subtest script', 'content': subtest_error},
+                'skipped': False,
+                'system-out': "\n".join(lines_after_no_more_tests),
+            }
+            test_cases.append(test_case)
+
     logging.info('Parsed {0} test cases from "{1}".'.format(
         len(test_cases), start_test_log))
     return test_cases

diff --git a/util/test/merge_junit_xmls b/util/test/merge_junit_xmls
@@ -0,0 +1,6 @@
+#!/usr/bin/env sh
+
+# get the chpl home directory
+FIND_CHPL_HOME=$(cd $(dirname $0) ; cd ../..; pwd)
+
+$FIND_CHPL_HOME/util/test/run-in-test-venv.bash $FIND_CHPL_HOME/util/test/merge_junit_xmls.py "$@"
diff --git a/util/test/merge_junit_xmls.py b/util/test/merge_junit_xmls.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""Combine reports A.xml, B.xml, C.xml into a combined A.xml"""
+
+import argparse
+import os
+import sys
+
+from junitparser import JUnitXml, TestCase, TestSuite
+
+def merge_junit_xmls(xml_files):
+    if not xml_files:
+        return
+
+    merged = sum((JUnitXml.fromfile(xml) for xml in xml_files), JUnitXml())
+    merged.write(xml_files[0])
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('xml_files', nargs='+', help='JUnit XML files to merge')
+    args = parser.parse_args()
+    merge_junit_xmls(args.xml_files)
+
+if __name__ == '__main__':
+    main()
diff --git a/util/test/start_test.py b/util/test/start_test.py
@@ -212,11 +212,10 @@ def test_file(test):
             if args.performance or not args.gen_graphs:
                 error = run_sub_test(test)
 
-            # check for errors - 173 is an internal sub_test error that would
-            # have already reported.
-            if error != 0 and error != 173:
-                logger.write("[Error running sub_test for {0}]"
-                        .format(path_to_test))
+            # check for errors:
+            if error != 0:
+                logger.write("[Error running sub_test (code {1}) for {0}]"
+                        .format(path_to_test, error))
 
             if args.progress:
                 sys.stderr.write("[done]\n")
@@ -343,10 +342,9 @@ def test_directory(test, test_type):
                     if not args.clean_only:
                         # run all tests in dir
                         error = run_sub_test()
-                        # check for errors - 173 is an internal sub_test
-                        # error that would have already reported.
-                        if not error == 0 and not error == 173:
-                            logger.write("[Error {1} running sub_test in {0}]"
+                        # check for errors:
+                        if not error == 0:
+                            logger.write("[Error running sub_test (code {1}) in {0}]"
                                     .format(root, error))
 
             # let user know no tests were found

diff --git a/util/test/sub_test.py b/util/test/sub_test.py
@@ -1540,7 +1540,7 @@ def run_compileline(flag, lookingfor):
                                              numlocales, maxLocalesAvailable))
                     continue
             if os.getenv('CHPL_TEST_MULTILOCALE_ONLY') and (numlocales <= 1) and not is_ml_c_or_cpp_test:
-                sys.stdout.write('[Skipping {0} because it does not '
+                sys.stdout.write('[Skipping test {0} because it does not '
                                  'use more than one locale]\n'
                                  .format(os.path.join(localdir, test_filename)))
                 continue