From 8116c78ffddc71dec8f793339648a5239a5d9643 Mon Sep 17 00:00:00 2001
From: Simon Glass <sjg@chromium.org>
Date: Sun, 11 Apr 2021 16:27:27 +1200
Subject: [PATCH] buildman: Handle exceptions in threads gracefully

There have been at least a few cases where an exception has occurred in a
thread and resulted in buildman hanging: running out of disk space and
getting a unicode error.

Handle these by collecting a list of exceptions, printing them out and
reporting failure if any are found. Add a test for this.

Signed-off-by: Simon Glass <sjg@chromium.org>
---
 tools/buildman/builder.py       | 22 +++++++++++++++++-----
 tools/buildman/builderthread.py | 14 +++++++++++++-
 tools/buildman/control.py       | 16 +++++++++++-----
 tools/buildman/func_test.py     | 15 +++++++++++++++
 4 files changed, 56 insertions(+), 11 deletions(-)

diff --git a/tools/buildman/builder.py b/tools/buildman/builder.py
index be8a8fa13a..ce852eb03a 100644
--- a/tools/buildman/builder.py
+++ b/tools/buildman/builder.py
@@ -182,6 +182,7 @@ class Builder:
             only useful for testing in-tree builds.
         work_in_output: Use the output directory as the work directory and
             don't write to a separate output directory.
+        thread_exceptions: List of exceptions raised by thread jobs
 
     Private members:
         _base_board_dict: Last-summarised Dict of boards
@@ -235,7 +236,8 @@ class Builder:
                  no_subdirs=False, full_path=False, verbose_build=False,
                  mrproper=False, per_board_out_dir=False,
                  config_only=False, squash_config_y=False,
-                 warnings_as_errors=False, work_in_output=False):
+                 warnings_as_errors=False, work_in_output=False,
+                 test_thread_exceptions=False):
         """Create a new Builder object
 
         Args:
@@ -262,6 +264,9 @@ class Builder:
             warnings_as_errors: Treat all compiler warnings as errors
             work_in_output: Use the output directory as the work directory and
                 don't write to a separate output directory.
+            test_thread_exceptions: Uses for tests only, True to make the
+                threads raise an exception instead of reporting their result.
+                This simulates a failure in the code somewhere
         """
         self.toolchains = toolchains
         self.base_dir = base_dir
@@ -311,13 +316,16 @@ class Builder:
         self._re_migration_warning = re.compile(r'^={21} WARNING ={22}\n.*\n=+\n',
                                                 re.MULTILINE | re.DOTALL)
 
+        self.thread_exceptions = []
+        self.test_thread_exceptions = test_thread_exceptions
         if self.num_threads:
             self._single_builder = None
             self.queue = queue.Queue()
             self.out_queue = queue.Queue()
             for i in range(self.num_threads):
-                t = builderthread.BuilderThread(self, i, mrproper,
-                        per_board_out_dir)
+                t = builderthread.BuilderThread(
+                        self, i, mrproper, per_board_out_dir,
+                        test_exception=test_thread_exceptions)
                 t.setDaemon(True)
                 t.start()
                 self.threads.append(t)
@@ -1676,6 +1684,7 @@ class Builder:
             Tuple containing:
                 - number of boards that failed to build
                 - number of boards that issued warnings
+                - list of thread exceptions raised
         """
         self.commit_count = len(commits) if commits else 1
         self.commits = commits
@@ -1689,7 +1698,7 @@ class Builder:
         Print('\rStarting build...', newline=False)
         self.SetupBuild(board_selected, commits)
         self.ProcessResult(None)
-
+        self.thread_exceptions = []
         # Create jobs to build all commits for each board
         for brd in board_selected.values():
             job = builderthread.BuilderJob()
@@ -1728,5 +1737,8 @@ class Builder:
             rate = float(self.count) / duration.total_seconds()
             msg += ', duration %s, rate %1.2f' % (duration, rate)
         Print(msg)
+        if self.thread_exceptions:
+            Print('Failed: %d thread exceptions' % len(self.thread_exceptions),
+                  colour=self.col.RED)
 
-        return (self.fail, self.warned)
+        return (self.fail, self.warned, self.thread_exceptions)
diff --git a/tools/buildman/builderthread.py b/tools/buildman/builderthread.py
index 76ffbb66be..ddb3eab8c0 100644
--- a/tools/buildman/builderthread.py
+++ b/tools/buildman/builderthread.py
@@ -97,12 +97,15 @@ class BuilderThread(threading.Thread):
         test_exception: Used for testing; True to raise an exception instead of
             reporting the build result
     """
+    def __init__(self, builder, thread_num, mrproper, per_board_out_dir,
+                 test_exception=False):
         """Set up a new builder thread"""
         threading.Thread.__init__(self)
         self.builder = builder
         self.thread_num = thread_num
         self.mrproper = mrproper
         self.per_board_out_dir = per_board_out_dir
+        self.test_exception = test_exception
 
     def Make(self, commit, brd, stage, cwd, *args, **kwargs):
         """Run 'make' on a particular commit and board.
@@ -449,7 +452,12 @@ class BuilderThread(threading.Thread):
 
         Args:
             result: CommandResult object containing the results of the build
+
+        Raises:
+            ValueError if self.test_exception is true (for testing)
         """
+        if self.test_exception:
+            raise ValueError('test exception')
         if self.thread_num != -1:
             self.builder.out_queue.put(result)
         else:
@@ -547,5 +555,9 @@ class BuilderThread(threading.Thread):
         """
         while True:
             job = self.builder.queue.get()
-            self.RunJob(job)
+            try:
+                self.RunJob(job)
+            except Exception as e:
+                print('Thread exception:', e)
+                self.builder.thread_exceptions.append(e)
             self.builder.queue.task_done()
diff --git a/tools/buildman/control.py b/tools/buildman/control.py
index 5fcfba7ca3..a98d1b4c06 100644
--- a/tools/buildman/control.py
+++ b/tools/buildman/control.py
@@ -110,7 +110,7 @@ def ShowToolchainPrefix(boards, toolchains):
     return None
 
 def DoBuildman(options, args, toolchains=None, make_func=None, boards=None,
-               clean_dir=False):
+               clean_dir=False, test_thread_exceptions=False):
     """The main control code for buildman
 
     Args:
@@ -126,6 +126,9 @@ def DoBuildman(options, args, toolchains=None, make_func=None, boards=None,
                 boards. If this is None it will be created and scanned.
         clean_dir: Used for tests only, indicates that the existing output_dir
             should be removed before starting the build
+        test_thread_exceptions: Uses for tests only, True to make the threads
+            raise an exception instead of reporting their result. This simulates
+            a failure in the code somewhere
     """
     global builder
 
@@ -330,7 +333,8 @@ def DoBuildman(options, args, toolchains=None, make_func=None, boards=None,
             config_only=options.config_only,
             squash_config_y=not options.preserve_config_y,
             warnings_as_errors=options.warnings_as_errors,
-            work_in_output=options.work_in_output)
+            work_in_output=options.work_in_output,
+            test_thread_exceptions=test_thread_exceptions)
     builder.force_config_on_failure = not options.quick
     if make_func:
         builder.do_make = make_func
@@ -370,9 +374,11 @@ def DoBuildman(options, args, toolchains=None, make_func=None, boards=None,
         if options.summary:
             builder.ShowSummary(commits, board_selected)
         else:
-            fail, warned = builder.BuildBoards(commits, board_selected,
-                                options.keep_outputs, options.verbose)
-            if fail:
+            fail, warned, excs = builder.BuildBoards(
+                commits, board_selected, options.keep_outputs, options.verbose)
+            if excs:
+                return 102
+            elif fail:
                 return 100
             elif warned and not options.ignore_warnings:
                 return 101
diff --git a/tools/buildman/func_test.py b/tools/buildman/func_test.py
index c6997d1ee2..61e3012d2b 100644
--- a/tools/buildman/func_test.py
+++ b/tools/buildman/func_test.py
@@ -16,6 +16,7 @@ from buildman import toolchain
 from patman import command
 from patman import gitutil
 from patman import terminal
+from patman import test_util
 from patman import tools
 
 settings_data = '''
@@ -219,6 +220,8 @@ class TestFunctional(unittest.TestCase):
         return command.RunPipe([[self._buildman_pathname] + list(args)],
                 capture=True, capture_stderr=True)
 
+    def _RunControl(self, *args, boards=None, clean_dir=False,
+                    test_thread_exceptions=False):
         """Run buildman
 
         Args:
@@ -226,6 +229,9 @@ class TestFunctional(unittest.TestCase):
             boards:
             clean_dir: Used for tests only, indicates that the existing output_dir
                 should be removed before starting the build
+            test_thread_exceptions: Uses for tests only, True to make the threads
+                raise an exception instead of reporting their result. This simulates
+                a failure in the code somewhere
 
         Returns:
             result code from buildman
@@ -234,6 +240,8 @@ class TestFunctional(unittest.TestCase):
         options, args = cmdline.ParseArgs()
         result = control.DoBuildman(options, args, toolchains=self._toolchains,
                 make_func=self._HandleMake, boards=boards or self._boards,
+                clean_dir=clean_dir,
+                test_thread_exceptions=test_thread_exceptions)
         self._builder = control.builder
         return result
 
@@ -597,3 +605,10 @@ class TestFunctional(unittest.TestCase):
         with self.assertRaises(SystemExit) as e:
             self._RunControl('-w', clean_dir=False)
         self.assertIn("specify -o", str(e.exception))
+
+    def testThreadExceptions(self):
+        """Test that exceptions in threads are reported"""
+        with test_util.capture_sys_output() as (stdout, stderr):
+            self.assertEqual(102, self._RunControl('-o', self._output_dir,
+                                                   test_thread_exceptions=True))
+        self.assertIn('Thread exception: test exception', stdout.getvalue())
-- 
2.39.5