diff options
Diffstat (limited to '.gitlab-ci/bare-metal/cros_servo_run.py')
-rwxr-xr-x | .gitlab-ci/bare-metal/cros_servo_run.py | 163 |
1 files changed, 75 insertions, 88 deletions
diff --git a/.gitlab-ci/bare-metal/cros_servo_run.py b/.gitlab-ci/bare-metal/cros_servo_run.py index ecb6c460fcb..02a91edc385 100755 --- a/.gitlab-ci/bare-metal/cros_servo_run.py +++ b/.gitlab-ci/bare-metal/cros_servo_run.py @@ -1,76 +1,30 @@ - #!/usr/bin/env python3 # # Copyright © 2020 Google LLC -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. +# SPDX-License-Identifier: MIT import argparse -import queue import re -from serial_buffer import SerialBuffer import sys -import threading +from custom_logger import CustomLogger +from serial_buffer import SerialBuffer -class CrosServoRun: - def __init__(self, cpu, ec): - # Merged FIFO for the two serial buffers, fed by threads. - self.serial_queue = queue.Queue() - self.sentinel = object() - self.threads_done = 0 - self.ec_ser = SerialBuffer( - ec, "results/serial-ec.txt", "R SERIAL-EC> ") +class CrosServoRun: + def __init__(self, cpu, ec, test_timeout, logger): self.cpu_ser = SerialBuffer( cpu, "results/serial.txt", "R SERIAL-CPU> ") + # Merge the EC serial into the cpu_ser's line stream so that we can + # effectively poll on both at the same time and not have to worry about + self.ec_ser = SerialBuffer( + ec, "results/serial-ec.txt", "R SERIAL-EC> ", line_queue=self.cpu_ser.line_queue) + self.test_timeout = test_timeout + self.logger = logger - self.iter_feed_ec = threading.Thread( - target=self.iter_feed_queue, daemon=True, args=(self.ec_ser.lines(),)) - self.iter_feed_ec.start() - - self.iter_feed_cpu = threading.Thread( - target=self.iter_feed_queue, daemon=True, args=(self.cpu_ser.lines(),)) - self.iter_feed_cpu.start() - - # Feed lines from our serial queues into the merged queue, marking when our - # input is done. - def iter_feed_queue(self, it): - for i in it: - self.serial_queue.put(i) - self.serial_queue.put(sentinel) - - # Return the next line from the queue, counting how many threads have - # terminated and joining when done - def get_serial_queue_line(self): - line = self.serial_queue.get() - if line == self.sentinel: - self.threads_done = self.threads_done + 1 - if self.threads_done == 2: - self.iter_feed_cpu.join() - self.iter_feed_ec.join() - return line - - # Returns an iterator for getting the next line. - def serial_queue_lines(self): - return iter(self.get_serial_queue_line, self.sentinel) + def close(self): + self.ec_ser.close() + self.cpu_ser.close() def ec_write(self, s): print("W SERIAL-EC> %s" % s) @@ -84,53 +38,71 @@ class CrosServoRun: RED = '\033[0;31m' NO_COLOR = '\033[0m' print(RED + message + NO_COLOR) + self.logger.update_status_fail(message) def run(self): # Flush any partial commands in the EC's prompt, then ask for a reboot. self.ec_write("\n") self.ec_write("reboot\n") + bootloader_done = False + self.logger.create_job_phase("boot") + tftp_failures = 0 # This is emitted right when the bootloader pauses to check for input. # Emit a ^N character to request network boot, because we don't have a # direct-to-netboot firmware on cheza. - for line in self.serial_queue_lines(): + for line in self.cpu_ser.lines(timeout=120, phase="bootloader"): if re.search("load_archive: loading locale_en.bin", line): self.cpu_write("\016") + bootloader_done = True + break + + # The Cheza firmware seems to occasionally get stuck looping in + # this error state during TFTP booting, possibly based on amount of + # network traffic around it, but it'll usually recover after a + # reboot. Currently mostly visible on google-freedreno-cheza-14. + if re.search("R8152: Bulk read error 0xffffffbf", line): + tftp_failures += 1 + if tftp_failures >= 10: + self.print_error( + "Detected intermittent tftp failure, restarting run.") + return 1 + + # If the board has a netboot firmware and we made it to booting the + # kernel, proceed to processing of the test run. + if re.search("Booting Linux", line): + bootloader_done = True break # The Cheza boards have issues with failing to bring up power to # the system sometimes, possibly dependent on ambient temperature # in the farm. if re.search("POWER_GOOD not seen in time", line): - self.print_error("Detected intermittent poweron failure, restarting run...") - return 2 + self.print_error( + "Detected intermittent poweron failure, abandoning run.") + return 1 - tftp_failures = 0 - for line in self.serial_queue_lines(): + if not bootloader_done: + self.print_error("Failed to make it through bootloader, abandoning run.") + return 1 + + self.logger.create_job_phase("test") + for line in self.cpu_ser.lines(timeout=self.test_timeout, phase="test"): if re.search("---. end Kernel panic", line): return 1 - # The Cheza firmware seems to occasionally get stuck looping in - # this error state during TFTP booting, possibly based on amount of - # network traffic around it, but it'll usually recover after a - # reboot. - if re.search("R8152: Bulk read error 0xffffffbf", line): - tftp_failures += 1 - if tftp_failures >= 100: - self.print_error("Detected intermittent tftp failure, restarting run...") - return 2 - # There are very infrequent bus errors during power management transitions # on cheza, which we don't expect to be the case on future boards. if re.search("Kernel panic - not syncing: Asynchronous SError Interrupt", line): - self.print_error("Detected cheza power management bus error, restarting run...") - return 2 + self.print_error( + "Detected cheza power management bus error, abandoning run.") + return 1 # If the network device dies, it's probably not graphics's fault, just try again. if re.search("NETDEV WATCHDOG", line): self.print_error( - "Detected network device failure, restarting run...") - return 2 + "Detected network device failure, abandoning run.") + return 1 # These HFI response errors started appearing with the introduction # of piglit runs. CosmicPenguin says: @@ -142,17 +114,30 @@ class CrosServoRun: # Given that it seems to trigger randomly near a GPU fault and then # break many tests after that, just restart the whole run. if re.search("a6xx_hfi_send_msg.*Unexpected message id .* on the response queue", line): - self.print_error("Detected cheza power management bus error, restarting run...") - return 2 + self.print_error( + "Detected cheza power management bus error, abandoning run.") + return 1 + + if re.search("coreboot.*bootblock starting", line): + self.print_error( + "Detected spontaneous reboot, abandoning run.") + return 1 + + if re.search("arm-smmu 5040000.iommu: TLB sync timed out -- SMMU may be deadlocked", line): + self.print_error("Detected cheza MMU fail, abandoning run.") + return 1 result = re.search("hwci: mesa: (\S*)", line) if result: if result.group(1) == "pass": + self.logger.update_dut_job("status", "pass") return 0 else: + self.logger.update_status_fail("test fail") return 1 - self.print_error("Reached the end of the CPU serial log without finding a result") + self.print_error( + "Reached the end of the CPU serial log without finding a result") return 1 @@ -162,17 +147,19 @@ def main(): help='CPU Serial device', required=True) parser.add_argument( '--ec', type=str, help='EC Serial device', required=True) + parser.add_argument( + '--test-timeout', type=int, help='Test phase timeout (minutes)', required=True) args = parser.parse_args() - servo = CrosServoRun(args.cpu, args.ec) - - while True: - retval = servo.run() - if retval != 2: - break + logger = CustomLogger("job_detail.json") + logger.update_dut_time("start", None) + servo = CrosServoRun(args.cpu, args.ec, args.test_timeout * 60, logger) + retval = servo.run() # power down the CPU on the device servo.ec_write("power off\n") + logger.update_dut_time("end", None) + servo.close() sys.exit(retval) |