summaryrefslogtreecommitdiff
path: root/server
diff options
context:
space:
mode:
authorjadmanski <jadmanski@592f7852-d20e-0410-864c-8624ca9c26a4>2009-03-25 20:07:10 +0000
committerjadmanski <jadmanski@592f7852-d20e-0410-864c-8624ca9c26a4>2009-03-25 20:07:10 +0000
commit9b5ace430a54607f50fe694abf9eceaeffc626b3 (patch)
tree5f232ae06f4e94cf73a58ee108ac958a4d9f4d03 /server
parentf6a98e7ae84948de502948bd360b330c41046e5b (diff)
Add support for running autoserv with a "--collect-crashinfo" flag
that tells autotest to run a job that includes ONLY the crashinfo collection. This will pull back crashinfo, and if run against a results directory from a crashed job it will also pull back any client results that it can find on the remote host. Risk: Low Visibility: Adds a new mode to autoserv for just doing crashinfo collection. Signed-off-by: John Admanski <jadmanski@google.com> git-svn-id: svn://test.kernel.org/autotest/trunk@2933 592f7852-d20e-0410-864c-8624ca9c26a4
Diffstat (limited to 'server')
-rwxr-xr-xserver/autoserv10
-rw-r--r--server/autoserv_parser.py3
-rw-r--r--server/hosts/remote.py39
-rwxr-xr-xserver/server_job.py53
-rw-r--r--server/server_job_unittest.py1
5 files changed, 67 insertions, 39 deletions
diff --git a/server/autoserv b/server/autoserv
index 0f228661..00fd5f44 100755
--- a/server/autoserv
+++ b/server/autoserv
@@ -61,13 +61,15 @@ def run_autoserv(pid_file_manager, results, parser):
ssh_user = parser.options.ssh_user
ssh_port = parser.options.ssh_port
ssh_pass = parser.options.ssh_pass
+ collect_crashinfo = parser.options.collect_crashinfo
# can't be both a client and a server side test
if client and server:
print "Can not specify a test as both server and client!"
sys.exit(1)
- if len(parser.args) < 1 and not (verify or repair or cleanup):
+ if len(parser.args) < 1 and not (verify or repair or cleanup
+ or collect_crashinfo):
print parser.parser.print_help()
sys.exit(1)
@@ -121,7 +123,8 @@ def run_autoserv(pid_file_manager, results, parser):
job.verify()
else:
try:
- job.run(cleanup, install_before, install_after)
+ job.run(cleanup, install_before, install_after,
+ only_collect_crashinfo=collect_crashinfo)
finally:
while job.hosts:
host = job.hosts.pop()
@@ -152,7 +155,8 @@ def main():
if not results:
results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
results = os.path.abspath(results)
- if os.path.exists(os.path.join(results, 'control.srv')):
+ resultdir_exists = os.path.exists(os.path.join(results, 'control.srv'))
+ if not parser.options.collect_crashinfo and resultdir_exists:
error = "Error: results directory already exists: %s\n" % results
sys.stderr.write(error)
sys.exit(1)
diff --git a/server/autoserv_parser.py b/server/autoserv_parser.py
index 7fe8fa85..5e43e792 100644
--- a/server/autoserv_parser.py
+++ b/server/autoserv_parser.py
@@ -100,6 +100,9 @@ class base_autoserv_parser(object):
dest="install_in_tmpdir", default=False,
help=("by default install autotest clients in "
"a temporary directory"))
+ self.parser.add_option("--collect-crashinfo", action="store_true",
+ dest="collect_crashinfo", default=False,
+ help="just run crashinfo collection")
def parse_args(self):
diff --git a/server/hosts/remote.py b/server/hosts/remote.py
index b0fb8bc9..2c4a79f1 100644
--- a/server/hosts/remote.py
+++ b/server/hosts/remote.py
@@ -1,7 +1,7 @@
"""This class defines the Remote host class, mixing in the SiteHost class
if it is available."""
-import os, time
+import os, time, pickle, logging
from autotest_lib.client.common_lib import error
from autotest_lib.server import utils, profiler
from autotest_lib.server.hosts import base_classes, bootloader
@@ -201,18 +201,19 @@ class RemoteHost(base_classes.Host):
def get_crashinfo(self, test_start_time):
- print "Collecting crash information..."
+ logging.info("Collecting crash information...")
super(RemoteHost, self).get_crashinfo(test_start_time)
# wait for four hours, to see if the machine comes back up
current_time = time.strftime("%b %d %H:%M:%S", time.localtime())
- print "Waiting four hours for %s to come up (%s)" % (self.hostname,
- current_time)
+ logging.info("Waiting four hours for %s to come up (%s)",
+ self.hostname, current_time)
if not self.wait_up(timeout=4*60*60):
- print "%s down, unable to collect crash info" % self.hostname
+ logging.warning("%s down, unable to collect crash info",
+ self.hostname)
return
else:
- print "%s is back up, collecting crash info" % self.hostname
+ logging.info("%s is back up, collecting crash info", self.hostname)
# find a directory to put the crashinfo into
if self.job:
@@ -226,26 +227,26 @@ class RemoteHost(base_classes.Host):
# collect various log files
log_files = ["/var/log/messages", "/var/log/monitor-ssh-reboots"]
for log in log_files:
- print "Collecting %s..." % log
+ logging.info("Collecting %s...", log)
try:
self.get_file(log, infodir)
except Exception:
- print "Collection of %s failed. Non-fatal, continuing." % log
+ logging.warning("Collection of %s failed", log)
# collect dmesg
- print "Collecting dmesg (saved to crashinfo/dmesg)..."
+ logging.info("Collecting dmesg (saved to crashinfo/dmesg)...")
devnull = open("/dev/null", "w")
try:
try:
result = self.run("dmesg", stdout_tee=devnull).stdout
file(os.path.join(infodir, "dmesg"), "w").write(result)
except Exception, e:
- print "crashinfo collection of dmesg failed with:\n%s" % e
+ logging.warning("Collection of dmesg failed:\n%s", e)
finally:
devnull.close()
# collect any profiler data we can find
- print "Collecting any server-side profiler data lying around..."
+ logging.info("Collecting any server-side profiler data lying around...")
try:
cmd = "ls %s" % profiler.PROFILER_TMPDIR
profiler_dirs = [path for path in self.run(cmd).stdout.split()
@@ -260,7 +261,21 @@ class RemoteHost(base_classes.Host):
os.mkdir(local_path)
self.get_file(remote_path + "/", local_path)
except Exception, e:
- print "crashinfo collection of profiler data failed with:\n%s" % e
+ logging.warning("Collection of profiler data failed with:\n%s", e)
+
+
+ # collect any uncollected logs we see (for this host)
+ if self.job and os.path.exists(self.job.uncollected_log_file):
+ try:
+ logs = pickle.load(open(self.job.uncollected_log_file))
+ for hostname, remote_path, local_path in logs:
+ if hostname == self.hostname:
+ logging.info("Retrieving logs from %s:%s into %s",
+ hostname, remote_path, local_path)
+ self.get_file(remote_path + "/", local_path + "/")
+ except Exception, e:
+ logging.warning("Error while trying to collect stranded "
+ "Autotest client logs: %s", e)
def are_wait_up_processes_up(self):
diff --git a/server/server_job.py b/server/server_job.py
index 98e4ed19..05bd19fb 100755
--- a/server/server_job.py
+++ b/server/server_job.py
@@ -104,9 +104,10 @@ class base_server_job(object):
if resultdir:
if not os.path.exists(resultdir):
os.mkdir(resultdir)
- log_file = open(self.uncollected_log_file, "w")
- pickle.dump([], log_file)
- log_file.close()
+ if not os.path.exists(self.uncollected_log_file):
+ log_file = open(self.uncollected_log_file, "w")
+ pickle.dump([], log_file)
+ log_file.close()
if not os.path.exists(self.debugdir):
os.mkdir(self.debugdir)
status_log = self.get_status_log_path()
@@ -360,7 +361,7 @@ class base_server_job(object):
USE_TEMP_DIR = object()
def run(self, cleanup=False, install_before=False, install_after=False,
collect_crashdumps=True, namespace={}, control=None,
- control_file_dir=None):
+ control_file_dir=None, only_collect_crashinfo=False):
# use a copy so changes don't affect the original dictionary
namespace = namespace.copy()
machines = self.machines
@@ -390,27 +391,31 @@ class base_server_job(object):
if install_before and machines:
self._execute_code(INSTALL_CONTROL_FILE, namespace)
- # determine the dir to write the control files to
- if control_file_dir and control_file_dir is not self.USE_TEMP_DIR:
- temp_control_file_dir = None
- else:
- temp_control_file_dir = control_file_dir = tempfile.mkdtemp(
- suffix='temp_control_file_dir')
- server_control_file = os.path.join(control_file_dir,
- SERVER_CONTROL_FILENAME)
- client_control_file = os.path.join(control_file_dir,
- CLIENT_CONTROL_FILENAME)
- if self.client:
- namespace['control'] = control
- utils.open_write_close(client_control_file, control)
- shutil.copy(CLIENT_WRAPPER_CONTROL_FILE, server_control_file)
- else:
- namespace['utils'] = utils
- utils.open_write_close(server_control_file, control)
- self._execute_code(server_control_file, namespace)
+ if not only_collect_crashinfo:
+ # determine the dir to write the control files to
+ cfd_specified = (control_file_dir
+ and control_file_dir is not self.USE_TEMP_DIR)
+ if cfd_specified:
+ temp_control_file_dir = None
+ else:
+ temp_control_file_dir = tempfile.mkdtemp(
+ suffix='temp_control_file_dir')
+ control_file_dir = temp_control_file_dir
+ server_control_file = os.path.join(control_file_dir,
+ SERVER_CONTROL_FILENAME)
+ client_control_file = os.path.join(control_file_dir,
+ CLIENT_CONTROL_FILENAME)
+ if self.client:
+ namespace['control'] = control
+ utils.open_write_close(client_control_file, control)
+ shutil.copy(CLIENT_WRAPPER_CONTROL_FILE,
+ server_control_file)
+ else:
+ utils.open_write_close(server_control_file, control)
+ self._execute_code(server_control_file, namespace)
- # disable crashinfo collection if we get this far without error
- collect_crashinfo = False
+ # no error occured, so we don't need to collect crashinfo
+ collect_crashinfo = False
finally:
if temp_control_file_dir:
# Clean up temp directory used for copies of the control files
diff --git a/server/server_job_unittest.py b/server/server_job_unittest.py
index 4411b7cf..a553d6a3 100644
--- a/server/server_job_unittest.py
+++ b/server/server_job_unittest.py
@@ -59,6 +59,7 @@ class CopyLogsTest(unittest.TestCase):
os.path.exists.expect_call(
mock.is_string_comparator()).and_return(False)
os.mkdir.expect_call(mock.is_string_comparator())
+ os.path.exists.expect_call(self.uncollected).and_return(False)
server_job.open.expect_call(self.uncollected, 'w').and_return(file_obj)
pickle.dump.expect_call([], file_obj)
file_obj.close.expect_call()