410 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			410 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
	
| # Lint as: python2, python3
 | |
| """This class defines the Remote host class."""
 | |
| 
 | |
| from __future__ import absolute_import
 | |
| from __future__ import division
 | |
| from __future__ import print_function
 | |
| import os, logging, time
 | |
| import six
 | |
| from six.moves import urllib
 | |
| import re
 | |
| from autotest_lib.client.common_lib import error
 | |
| from autotest_lib.server import utils
 | |
| from autotest_lib.server.hosts import base_classes
 | |
| 
 | |
| 
 | |
| class RemoteHost(base_classes.Host):
 | |
|     """
 | |
|     This class represents a remote machine on which you can run
 | |
|     programs.
 | |
| 
 | |
|     It may be accessed through a network, a serial line, ...
 | |
|     It is not the machine autoserv is running on.
 | |
| 
 | |
|     Implementation details:
 | |
|     This is an abstract class, leaf subclasses must implement the methods
 | |
|     listed here and in parent classes which have no implementation. They
 | |
|     may reimplement methods which already have an implementation. You
 | |
|     must not instantiate this class but should instantiate one of those
 | |
|     leaf subclasses.
 | |
|     """
 | |
| 
 | |
|     DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
 | |
|     DEFAULT_HALT_TIMEOUT = 2 * 60
 | |
|     _LABEL_FUNCTIONS = []
 | |
|     _DETECTABLE_LABELS = []
 | |
| 
 | |
|     VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start"
 | |
|     TMP_DIR_TEMPLATE = '/usr/local/tmp/autoserv-XXXXXX'
 | |
| 
 | |
| 
 | |
|     def _initialize(self, hostname, autodir=None, *args, **dargs):
 | |
|         super(RemoteHost, self)._initialize(*args, **dargs)
 | |
| 
 | |
|         self.hostname = hostname
 | |
|         self.autodir = autodir
 | |
|         self.tmp_dirs = []
 | |
| 
 | |
| 
 | |
|     def __repr__(self):
 | |
|         return "<remote host: %s>" % self.hostname
 | |
| 
 | |
| 
 | |
|     def close(self):
 | |
|         # pylint: disable=missing-docstring
 | |
|         super(RemoteHost, self).close()
 | |
|         self.stop_loggers()
 | |
| 
 | |
|         if hasattr(self, 'tmp_dirs'):
 | |
|             for dir in self.tmp_dirs:
 | |
|                 try:
 | |
|                     self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
 | |
|                 except error.AutoservRunError:
 | |
|                     pass
 | |
| 
 | |
| 
 | |
|     def job_start(self):
 | |
|         """
 | |
|         Abstract method, called the first time a remote host object
 | |
|         is created for a specific host after a job starts.
 | |
| 
 | |
|         This method depends on the create_host factory being used to
 | |
|         construct your host object. If you directly construct host objects
 | |
|         you will need to call this method yourself (and enforce the
 | |
|         single-call rule).
 | |
|         """
 | |
|         try:
 | |
|             cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages '
 | |
|                    '%s') % self.VAR_LOG_MESSAGES_COPY_PATH
 | |
|             self.run(cmd)
 | |
|         except Exception as e:
 | |
|             # Non-fatal error
 | |
|             logging.info('Failed to copy /var/log/messages at startup: %s', e)
 | |
| 
 | |
| 
 | |
|     def get_autodir(self):
 | |
|         return self.autodir
 | |
| 
 | |
| 
 | |
|     def set_autodir(self, autodir):
 | |
|         """
 | |
|         This method is called to make the host object aware of the
 | |
|         where autotest is installed. Called in server/autotest.py
 | |
|         after a successful install
 | |
|         """
 | |
|         self.autodir = autodir
 | |
| 
 | |
| 
 | |
|     def sysrq_reboot(self):
 | |
|         # pylint: disable=missing-docstring
 | |
|         self.run_background('echo b > /proc/sysrq-trigger')
 | |
| 
 | |
| 
 | |
|     def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True):
 | |
|         """
 | |
|         Shut down the remote host.
 | |
| 
 | |
|         N.B.  This method makes no provision to bring the target back
 | |
|         up.  The target will be offline indefinitely if there's no
 | |
|         independent hardware (servo, RPM, etc.) to force the target to
 | |
|         power on.
 | |
| 
 | |
|         @param timeout  Maximum time to wait for host down, in seconds.
 | |
|         @param wait  Whether to wait for the host to go offline.
 | |
|         """
 | |
|         self.run_background('sleep 1 ; halt')
 | |
|         if wait:
 | |
|             self.wait_down(timeout=timeout)
 | |
| 
 | |
| 
 | |
|     def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True,
 | |
|                fastsync=False, reboot_cmd=None, **dargs):
 | |
|         """
 | |
|         Reboot the remote host.
 | |
| 
 | |
|         Args:
 | |
|                 timeout - How long to wait for the reboot.
 | |
|                 wait - Should we wait to see if the machine comes back up.
 | |
|                        If this is set to True, ignores reboot_cmd's error
 | |
|                        even if occurs.
 | |
|                 fastsync - Don't wait for the sync to complete, just start one
 | |
|                         and move on. This is for cases where rebooting prompty
 | |
|                         is more important than data integrity and/or the
 | |
|                         machine may have disks that cause sync to never return.
 | |
|                 reboot_cmd - Reboot command to execute.
 | |
|         """
 | |
|         self.reboot_setup(**dargs)
 | |
|         if not reboot_cmd:
 | |
|             reboot_cmd = ('sync & sleep 5; '
 | |
|                           'reboot & sleep 60; '
 | |
|                           'reboot -f & sleep 10; '
 | |
|                           'reboot -nf & sleep 10; '
 | |
|                           'telinit 6')
 | |
| 
 | |
|         def reboot():
 | |
|             # pylint: disable=missing-docstring
 | |
|             self.record("GOOD", None, "reboot.start")
 | |
|             current_boot_id = None
 | |
|             try:
 | |
|                 current_boot_id = self.get_boot_id()
 | |
| 
 | |
|                 # sync before starting the reboot, so that a long sync during
 | |
|                 # shutdown isn't timed out by wait_down's short timeout
 | |
|                 if not fastsync:
 | |
|                     self.run('sync; sync', timeout=timeout, ignore_status=True)
 | |
| 
 | |
|                 self.run_background(reboot_cmd)
 | |
|             except error.AutoservRunError:
 | |
|                 # If wait is set, ignore the error here, and rely on the
 | |
|                 # wait_for_restart() for stability, instead.
 | |
|                 # reboot_cmd sometimes causes an error even if reboot is
 | |
|                 # successfully in progress. This is difficult to be avoided,
 | |
|                 # because we have no much control on remote machine after
 | |
|                 # "reboot" starts.
 | |
|                 if not wait or current_boot_id is None:
 | |
|                     # TODO(b/37652392): Revisit no-wait case, later.
 | |
|                     self.record("ABORT", None, "reboot.start",
 | |
|                                 "reboot command failed")
 | |
|                     raise
 | |
|             if wait:
 | |
|                 self.wait_for_restart(timeout, old_boot_id=current_boot_id,
 | |
|                                       **dargs)
 | |
| 
 | |
|         # if this is a full reboot-and-wait, run the reboot inside a group
 | |
|         if wait:
 | |
|             self.log_op(self.OP_REBOOT, reboot)
 | |
|         else:
 | |
|             reboot()
 | |
| 
 | |
|     def suspend(self, timeout, suspend_cmd,
 | |
|                 allow_early_resume=False):
 | |
|         """
 | |
|         Suspend the remote host.
 | |
| 
 | |
|         Args:
 | |
|                 timeout - How long to wait for the suspend in integer seconds.
 | |
|                 suspend_cmd - suspend command to execute.
 | |
|                 allow_early_resume - Boolean that indicate whether resume
 | |
|                                      before |timeout| is ok.
 | |
|         Raises:
 | |
|                 error.AutoservSuspendError - If |allow_early_resume| is False
 | |
|                                              and if device resumes before
 | |
|                                              |timeout|.
 | |
|         """
 | |
|         # define a function for the supend and run it in a group
 | |
|         def suspend():
 | |
|             # pylint: disable=missing-docstring
 | |
|             self.record("GOOD", None, "suspend.start for %d seconds" % (timeout))
 | |
|             try:
 | |
|                 self.run_background(suspend_cmd)
 | |
|             except error.AutoservRunError:
 | |
|                 self.record("ABORT", None, "suspend.start",
 | |
|                             "suspend command failed")
 | |
|                 raise error.AutoservSuspendError("suspend command failed")
 | |
| 
 | |
|             # Wait for some time, to ensure the machine is going to sleep.
 | |
|             # Not too long to check if the machine really suspended.
 | |
|             time_slice = min(timeout / 2, 300)
 | |
|             time.sleep(time_slice)
 | |
|             time_counter = time_slice
 | |
|             while time_counter < timeout + 60:
 | |
|                 # Check if the machine is back. We check regularely to
 | |
|                 # ensure the machine was suspended long enough.
 | |
|                 if utils.ping(self.hostname, tries=1, deadline=1) == 0:
 | |
|                     return
 | |
|                 else:
 | |
|                     if time_counter > timeout - 10:
 | |
|                         time_slice = 5
 | |
|                     time.sleep(time_slice)
 | |
|                     time_counter += time_slice
 | |
| 
 | |
|             if utils.ping(self.hostname, tries=1, deadline=1) != 0:
 | |
|                 raise error.AutoservSuspendError(
 | |
|                     "DUT is not responding after %d seconds" % (time_counter))
 | |
| 
 | |
|         start_time = time.time()
 | |
|         self.log_op(self.OP_SUSPEND, suspend)
 | |
|         lasted = time.time() - start_time
 | |
|         logging.info("Device resumed after %d secs", lasted)
 | |
|         if (lasted < timeout and not allow_early_resume):
 | |
|             raise error.AutoservSuspendError(
 | |
|                 "Suspend did not last long enough: %d instead of %d" % (
 | |
|                     lasted, timeout))
 | |
| 
 | |
|     def reboot_followup(self, *args, **dargs):
 | |
|         # pylint: disable=missing-docstring
 | |
|         super(RemoteHost, self).reboot_followup(*args, **dargs)
 | |
|         if self.job:
 | |
|             self.job.profilers.handle_reboot(self)
 | |
| 
 | |
| 
 | |
|     def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
 | |
|         """
 | |
|         Wait for the host to come back from a reboot. This wraps the
 | |
|         generic wait_for_restart implementation in a reboot group.
 | |
|         """
 | |
|         def op_func():
 | |
|             # pylint: disable=missing-docstring
 | |
|             super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
 | |
|         self.log_op(self.OP_REBOOT, op_func)
 | |
| 
 | |
| 
 | |
|     def cleanup(self):
 | |
|         # pylint: disable=missing-docstring
 | |
|         super(RemoteHost, self).cleanup()
 | |
|         self.reboot()
 | |
| 
 | |
| 
 | |
|     def get_tmp_dir(self, parent='/tmp'):
 | |
|         """
 | |
|         Return the pathname of a directory on the host suitable
 | |
|         for temporary file storage.
 | |
| 
 | |
|         The directory and its content will be deleted automatically
 | |
|         on the destruction of the Host object that was used to obtain
 | |
|         it.
 | |
|         """
 | |
|         template = os.path.join(parent, self.TMP_DIR_TEMPLATE)
 | |
|         dir_name = self.run('mkdir -p %s && mktemp -d %s' % (parent, template)).stdout.rstrip()
 | |
|         self.tmp_dirs.append(dir_name)
 | |
|         return dir_name
 | |
| 
 | |
| 
 | |
|     def get_platform_label(self):
 | |
|         """
 | |
|         Return the platform label, or None if platform label is not set.
 | |
|         """
 | |
| 
 | |
|         if self.job:
 | |
|             keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
 | |
|                                        self.hostname)
 | |
|             keyvals = utils.read_keyval(keyval_path)
 | |
|             return keyvals.get('platform', None)
 | |
|         else:
 | |
|             return None
 | |
| 
 | |
| 
 | |
|     def get_all_labels(self):
 | |
|         """
 | |
|         Return all labels, or empty list if label is not set.
 | |
|         """
 | |
|         if self.job:
 | |
|             keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
 | |
|                                        self.hostname)
 | |
|             keyvals = utils.read_keyval(keyval_path)
 | |
|             all_labels = keyvals.get('labels', '')
 | |
|             if all_labels:
 | |
|                 all_labels = all_labels.split(',')
 | |
|                 return [urllib.parse.unquote(label) for label in all_labels]
 | |
|         return []
 | |
| 
 | |
| 
 | |
|     def delete_tmp_dir(self, tmpdir):
 | |
|         """
 | |
|         Delete the given temporary directory on the remote machine.
 | |
| 
 | |
|         @param tmpdir The directory to delete.
 | |
|         """
 | |
|         self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True)
 | |
|         self.tmp_dirs.remove(tmpdir)
 | |
| 
 | |
| 
 | |
|     def delete_all_tmp_dirs(self, parent='/tmp'):
 | |
|         """
 | |
|         Delete all directories in parent that were created by get_tmp_dir
 | |
| 
 | |
|         Note that this may involve deleting directories created by calls to
 | |
|         get_tmp_dir on a different RemoteHost instance than the one running this
 | |
|         method. Only perform this operation when certain that this will not
 | |
|         cause unexpected behavior.
 | |
|         """
 | |
|         # follow mktemp's behavior of only expanding 3 or more consecutive Xs
 | |
|         if isinstance(parent, (list, tuple)):
 | |
|             parents = parent
 | |
|         else:
 | |
|             parents = [parent]
 | |
|         rm_paths = []
 | |
|         for parent in parents:
 | |
|             base_template = re.sub('XXXX*', '*', self.TMP_DIR_TEMPLATE)
 | |
|             # distinguish between non-wildcard asterisks in parent directory name
 | |
|             # and wildcards inserted from the template
 | |
|             base = '*'.join(
 | |
|                 ['"%s"' % utils.sh_escape(x) for x in base_template.split('*')])
 | |
|             path = '"%s' % os.path.join(utils.sh_escape(parent), base[1:])
 | |
|             rm_paths.append(path)
 | |
|             # remove deleted directories from tmp_dirs
 | |
|             regex = os.path.join(parent, re.sub('(XXXX*)',
 | |
|                             lambda match: '[a-zA-Z0-9]{%d}' % len(match.group(1)),
 | |
|                             self.TMP_DIR_TEMPLATE))
 | |
|             regex += '(/|$)' # remove if matches, or is within a dir that matches
 | |
|             self.tmp_dirs = [x for x in self.tmp_dirs if not re.match(regex, x)]
 | |
| 
 | |
|         self.run('rm -rf {}'.format(" ".join(rm_paths)), ignore_status=True)
 | |
| 
 | |
|     def check_uptime(self):
 | |
|         """
 | |
|         Check that uptime is available and monotonically increasing.
 | |
|         """
 | |
|         if not self.is_up():
 | |
|             raise error.AutoservHostError('Client does not appear to be up')
 | |
|         result = self.run("/bin/cat /proc/uptime", 30)
 | |
|         return result.stdout.strip().split()[0]
 | |
| 
 | |
| 
 | |
|     def check_for_lkdtm(self):
 | |
|         """
 | |
|         Check for kernel dump test module. return True if exist.
 | |
|         """
 | |
|         cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT'
 | |
|         return self.run(cmd, ignore_status=True).exit_status == 0
 | |
| 
 | |
| 
 | |
|     def are_wait_up_processes_up(self):
 | |
|         """
 | |
|         Checks if any HOSTS waitup processes are running yet on the
 | |
|         remote host.
 | |
| 
 | |
|         Returns True if any the waitup processes are running, False
 | |
|         otherwise.
 | |
|         """
 | |
|         processes = self.get_wait_up_processes()
 | |
|         if len(processes) == 0:
 | |
|             return True # wait up processes aren't being used
 | |
|         for procname in processes:
 | |
|             exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
 | |
|                                    ignore_status=True).exit_status
 | |
|             if exit_status == 0:
 | |
|                 return True
 | |
|         return False
 | |
| 
 | |
| 
 | |
|     def get_labels(self):
 | |
|         """Return a list of labels for this given host.
 | |
| 
 | |
|         This is the main way to retrieve all the automatic labels for a host
 | |
|         as it will run through all the currently implemented label functions.
 | |
|         """
 | |
|         labels = []
 | |
|         for label_function in self._LABEL_FUNCTIONS:
 | |
|             try:
 | |
|                 label = label_function(self)
 | |
|             except Exception:
 | |
|                 logging.exception('Label function %s failed; ignoring it.',
 | |
|                                   label_function.__name__)
 | |
|                 label = None
 | |
|             if label:
 | |
|                 if type(label) is str:
 | |
|                     labels.append(label)
 | |
|                 elif type(label) is list:
 | |
|                     labels.extend(label)
 | |
|         return labels
 | |
| 
 | |
|     def get_result_dir(self):
 | |
|         """Return the result directory path if passed or None if not.
 | |
| 
 | |
|         @return string
 | |
|         """
 | |
|         if self.job and hasattr(self.job, 'resultdir'):
 | |
|             return self.job.resultdir
 | |
|         return None
 |