framework/test/base.py: use subprocess32 for timeouts.

Subprocess32 provides a backport of python 3.2's subprocess module, which has a timeout parameter for Popen.communicate. When the timeout runs out then an exception is raised, and when that exception is caught we can kill the process. This is fairly similar to the way the current timeout mechanism works, except that the current mechanism is not thread safe. Since one of the major features of piglit is that it offer's processes isolated concurrency of tests, it makes sense to make the effort to provide a timeout mechanism that supports concurrency. Unfortunately there isn't a good cross platform mechanism for this in python 2.x, even with subprocess 32 only *nix systems are supported, not windows. The big advantage of this is it allows consistent behavior between python 2.x and 3.x as we look toward the future and the possibility of using a hybrid approach to transition to python 3.x while maintaining 2.x support until it's not needed. This patch look pretty substantial. It's not as big as it looks, since it adds some fairly big tests. Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
author: Dylan Baker <baker.dylan.c@gmail.com> 2015-10-22 14:39:14 -0700
committer: Dylan Baker <baker.dylan.c@gmail.com> 2016-01-21 14:13:33 -0800
commit: 09fc4303c9f300fe47a1a877eb3f4f3f15a66b68 (patch)
tree: ec964b2c7c8ed31f030ae8cfff22ff5dd34578ba
parent: e0114dc63ace991e6d91177456cd60c78bee9c45 (diff)
3 files changed, 179 insertions, 86 deletions
diff --git a/framework/test/base.py b/framework/test/base.py
index 417fd4e0b..8fe29eef9 100644
--- a/framework/test/base.py
+++ b/framework/test/base.py
@@ -25,16 +25,43 @@
 from __future__ import print_function, absolute_import
 import errno
 import os
-import subprocess
 import time
 import sys
 import traceback
-from datetime import datetime
-import threading
-import signal
 import itertools
 import abc
 import copy
+import signal
+import warnings
+
+try:
+    # subprocess32 only supports *nix systems, this is important because
+    # "start_new_session" is not a valid argument on windows
+
+    import subprocess32 as subprocess
+    _EXTRA_POPEN_ARGS = {'start_new_session': True}
+except ImportError:
+    # If there is no timeout support, fake it. Add a TimeoutExpired exception
+    # and a Popen that accepts a timeout parameter (and ignores it), then
+    # shadow the actual Popen with this wrapper.
+
+    import subprocess
+
+    class TimeoutExpired(Exception):
+        pass
+
+    class Popen(subprocess.Popen):
+        """Sublcass of Popen that accepts and ignores a timeout argument."""
+        def communicate(self, *args, **kwargs):
+            if 'timeout' in kwargs:
+                del kwargs['timeout']
+            return super(Popen, self).communicate(*args, **kwargs)
+
+    subprocess.TimeoutExpired = TimeoutExpired
+    subprocess.Popen = Popen
+    _EXTRA_POPEN_ARGS = {}
+
+    warnings.warn('Timeouts are not available')
 
 from framework import exceptions, options
 from framework.results import TestResult
@@ -62,56 +89,6 @@ class TestRunError(exceptions.PiglitException):
         self.status = status
 
 
-class ProcessTimeout(threading.Thread):
-    """ Timeout class for test processes
-
-    This class is for terminating tests that run for longer than a certain
-    amount of time. Create an instance by passing it a timeout in seconds
-    and the Popen object that is running your test. Then call the start
-    method (inherited from Thread) to start the timer. The Popen object is
-    killed if the timeout is reached and it has not completed. Wait for the
-    outcome by calling the join() method from the parent.
-
-    """
-
-    def __init__(self, timeout, proc):
-        threading.Thread.__init__(self)
-        self.proc = proc
-        self.timeout = timeout
-        self.status = 0
-
-    def run(self):
-        start_time = datetime.now()
-        delta = 0
-
-        # poll() returns the returncode attribute, which is either the return
-        # code of the child process (which could be zero), or None if the
-        # process has not yet terminated.
-
-        while delta < self.timeout and self.proc.poll() is None:
-            time.sleep(1)
-            delta = (datetime.now() - start_time).total_seconds()
-
-        # if the test is not finished after timeout, first try to terminate it
-        # and if that fails, send SIGKILL to all processes in the test's
-        # process group
-
-        if self.proc.poll() is None:
-            self.status = 1
-            self.proc.terminate()
-            time.sleep(5)
-
-        if self.proc.poll() is None:
-            self.status = 2
-            if hasattr(os, 'killpg'):
-                os.killpg(self.proc.pid, signal.SIGKILL)
-            self.proc.wait()
-
-    def join(self):
-        threading.Thread.join(self)
-        return self.status
-
-
 def is_crash_returncode(returncode):
     """Determine whether the given process return code correspond to a
     crash.
@@ -147,10 +124,10 @@ class Test(object):
     """
     __metaclass__ = abc.ABCMeta
     __slots__ = ['run_concurrent', 'env', 'result', 'cwd', '_command',
-                 '__proc_timeout']
-    timeout = 0
+                 'timeout']
+    timeout = None
 
-    def __init__(self, command, run_concurrent=False):
+    def __init__(self, command, run_concurrent=False, timeout=None):
         assert isinstance(command, list), command
 
         self.run_concurrent = run_concurrent
@@ -158,7 +135,9 @@ class Test(object):
         self.env = {}
         self.result = TestResult()
         self.cwd = None
-        self.__proc_timeout = None
+        if timeout is not None:
+            assert isinstance(timeout, int)
+            self.timeout = timeout
 
     def execute(self, path, log, dmesg):
         """ Run a test
@@ -205,11 +184,7 @@ class Test(object):
         """Convert the raw output of the test into a form piglit understands.
         """
         if is_crash_returncode(self.result.returncode):
-            # check if the process was terminated by the timeout
-            if self.timeout > 0 and self.__proc_timeout.join() > 0:
-                self.result.result = 'timeout'
-            else:
-                self.result.result = 'crash'
+            self.result.result = 'crash'
         elif self.result.returncode != 0:
             if self.result.result == 'pass':
                 self.result.result = 'warn'
@@ -258,10 +233,6 @@ class Test(object):
         """
         pass
 
-    def __set_process_group(self):
-        if hasattr(os, 'setpgrp'):
-            os.setpgrp()
-
     def _run_command(self):
         """ Run the test command and get the result
 
@@ -288,11 +259,6 @@ class Test(object):
                                           self.env.iteritems()):
             fullenv[key] = str(value)
 
-        # preexec_fn is not supported on Windows platforms
-        if sys.platform == 'win32':
-            preexec_fn = None
-        else:
-            preexec_fn = self.__set_process_group
 
         try:
             proc = subprocess.Popen(self.command,
@@ -301,19 +267,10 @@ class Test(object):
                                     cwd=self.cwd,
                                     env=fullenv,
                                     universal_newlines=True,
-                                    preexec_fn=preexec_fn)
-
-            # create a ProcessTimeout object to watch out for test hang if the
-            # process is still going after the timeout, then it will be killed
-            # forcing the communicate function (which is a blocking call) to
-            # return
-            if self.timeout > 0:
-                self.__proc_timeout = ProcessTimeout(self.timeout, proc)
-                self.__proc_timeout.start()
+                                    **_EXTRA_POPEN_ARGS)
 
             self.result.pid = proc.pid
-
-            out, err = proc.communicate()
+            out, err = proc.communicate(timeout=self.timeout)
             returncode = proc.returncode
         except OSError as e:
             # Different sets of tests get built under different build
@@ -323,6 +280,27 @@ class Test(object):
                 raise TestRunError("Test executable not found.\n", 'skip')
             else:
                 raise e
+        except subprocess.TimeoutExpired:
+            # This can only be reached if subprocess32 is present, since
+            # TimeoutExpired is never raised by the fallback code.
+
+            proc.terminate()
+
+            # XXX: A number of the os calls in this block don't work on windows,
+            # when porting to python 3 this will likely require an
+            # "if windows/else" block
+            if proc.poll() is None:
+                time.sleep(3)
+                os.killpg(os.getpgid(proc.pid), signal.SIGKILL)
+
+            # Since the process isn't running it's safe to get any remaining
+            # stdout/stderr values out and store them.
+            self.result.out, self.result.err = proc.communicate()
+
+            raise TestRunError(
+                'Test run time exceeded timeout value ({} seconds)\n'.format(
+                    self.timeout),
+                'timeout')
 
         # The setter handles the bytes/unicode conversion
         self.result.out = out
diff --git a/tox.ini b/tox.ini
index 81cdbcc99..072141766 100644
--- a/tox.ini
+++ b/tox.ini
@@ -13,6 +13,8 @@ deps =
     accel: simplejson
     accel: lxml
     accel: backports.lzma
+    accel: subprocess32
+    accel: psutil
     generator: six
     generator: numpy
 commands = 
diff --git a/unittests/base_tests.py b/unittests/base_tests.py
index 9fcf8eb61..68a9c93cc 100644
--- a/unittests/base_tests.py
+++ b/unittests/base_tests.py
@@ -21,19 +21,27 @@
 """ Tests for the exectest module """
 
 from __future__ import print_function, absolute_import
+import tempfile
+import textwrap
+import os
 
 import mock
 import nose.tools as nt
 from nose.plugins.attrib import attr
 
+try:
+    import psutil
+except ImportError:
+    pass
+
 from . import utils
+from .status_tests import PROBLEMS, STATUSES
 from framework.test.base import (
     Test,
     TestRunError,
     ValgrindMixin,
     WindowResizeMixin,
 )
-from .status_tests import PROBLEMS, STATUSES
 from framework.options import _Options as Options
 
 # pylint: disable=invalid-name
@@ -71,8 +79,111 @@ def test_run_return_early():
 
 
 @attr('slow')
+@nt.timed(6)
+def test_timeout_kill_children():
+    """test.base.Test: kill children if terminate fails
+
+    This creates a process that forks multiple times, and then checks that the
+    children have been killed.
+
+    This test could leave processes running if it fails.
+
+    """
+    utils.module_check('subprocess32')
+    utils.module_check('psutil')
+
+    import subprocess32  # pylint: disable=import-error
+
+    class PopenProxy(object):
+        """An object that proxies Popen, and saves the Popen instance as an
+        attribute.
+
+        This is useful for testing the Popen instance.
+
+        """
+        def __init__(self):
+            self.popen = None
+
+        def __call__(self, *args, **kwargs):
+            self.popen = subprocess32.Popen(*args, **kwargs)
+
+            # if commuincate cis called successfully then the proc will be
+            # reset to None, whic will make the test fail.
+            self.popen.communicate = mock.Mock(return_value=('out', 'err'))
+
+            return self.popen
+
+    with tempfile.NamedTemporaryFile() as f:
+        # Create a file that will be executed as a python script
+        # Create a process with two subproccesses (not threads) that will run
+        # for a long time.
+        f.write(textwrap.dedent("""\
+            import time
+            from multiprocessing import Process
+
+            def p():
+                for _ in range(100):
+                    time.sleep(1)
+
+            a = Process(target=p)
+            b = Process(target=p)
+            a.start()
+            b.start()
+            a.join()
+            b.join()
+        """))
+        f.seek(0)  # we'll need to read the file back
+
+        # Create an object that will return a popen object, but also store it
+        # so we can access it later
+        proxy = PopenProxy()
+
+        test = TimeoutTest(['python', f.name])
+        test.timeout = 1
+
+        # mock out subprocess.Popen with our proxy object
+        with mock.patch('framework.test.base.subprocess') as mock_subp:
+            mock_subp.Popen = proxy
+            mock_subp.TimeoutExpired = subprocess32.TimeoutExpired
+            test.run()
+
+        # Check to see if the Popen has children, even after it should have
+        # recieved a TimeoutExpired.
+        proc = psutil.Process(os.getsid(proxy.popen.pid))
+        children = proc.children(recursive=True)
+
+        if children:
+            # If there are still running children attempt to clean them up,
+            # starting with the final generation and working back to the first
+            for child in reversed(children):
+                child.kill()
+
+            raise utils.TestFailure(
+                'Test process had children when it should not')
+
+
+@attr('slow')
+@nt.timed(6)
 def test_timeout():
-    """test.base.Test.run(): Sets status to 'timeout' when timeout exceeded"""
+    """test.base.Test: Stops running test after timeout expires
+
+    This is a little bit of extra time here, but without a sleep of 60 seconds
+    if the test runs 5 seconds it's run too long
+
+    """
+    utils.module_check('subprocess32')
+    utils.binary_check('sleep', 1)
+
+    test = TimeoutTest(['sleep', '60'])
+    test.timeout = 1
+    test.run()
+
+
+@attr('slow')
+@nt.timed(6)
+def test_timeout_timeout():
+    """test.base.Test: Sets status to 'timeout' when timeout exceeded"""
+    utils.module_check('subprocess32')
     utils.binary_check('sleep', 1)
 
     test = TimeoutTest(['sleep', '60'])
@@ -81,9 +192,11 @@ def test_timeout():
     nt.eq_(test.result.result, 'timeout')
 
 
+@nt.timed(2)
 def test_timeout_pass():
-    """test.base.Test.run(): Doesn't change status when timeout not exceeded
+    """test.base.Test: Doesn't change status when timeout not exceeded
     """
+    utils.module_check('subprocess32')
     utils.binary_check('true')
 
     test = TimeoutTest(['true'])
author	Dylan Baker <baker.dylan.c@gmail.com>	2015-10-22 14:39:14 -0700
committer	Dylan Baker <baker.dylan.c@gmail.com>	2016-01-21 14:13:33 -0800
commit	09fc4303c9f300fe47a1a877eb3f4f3f15a66b68 (patch)
tree	ec964b2c7c8ed31f030ae8cfff22ff5dd34578ba
parent	e0114dc63ace991e6d91177456cd60c78bee9c45 (diff)