174 lines
		
	
	
		
			6.7 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			174 lines
		
	
	
		
			6.7 KiB
		
	
	
	
		
			Python
		
	
	
	
| # Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
 | |
| # Use of this source code is governed by a BSD-style license that can be
 | |
| # found in the LICENSE file.
 | |
| 
 | |
| import logging
 | |
| import math
 | |
| import threading
 | |
| 
 | |
| import common
 | |
| from autotest_lib.client.common_lib import env
 | |
| from autotest_lib.client.common_lib import error
 | |
| from autotest_lib.client.common_lib import utils
 | |
| from autotest_lib.client.common_lib.cros import retry
 | |
| from autotest_lib.frontend.afe.json_rpc import proxy
 | |
| from autotest_lib.server import frontend
 | |
| try:
 | |
|     from chromite.lib import retry_util
 | |
|     from chromite.lib import timeout_util
 | |
| except ImportError:
 | |
|     logging.warn('Unable to import chromite.')
 | |
|     retry_util = None
 | |
|     timeout_util = None
 | |
| 
 | |
| try:
 | |
|     from chromite.lib import metrics
 | |
| except ImportError:
 | |
|     logging.warn('Unable to import metrics from chromite.')
 | |
|     metrics = utils.metrics_mock
 | |
| 
 | |
| 
 | |
| def convert_timeout_to_retry(backoff, timeout_min, delay_sec):
 | |
|     """Compute the number of retry attempts for use with chromite.retry_util.
 | |
| 
 | |
|     @param backoff: The exponential backoff factor.
 | |
|     @param timeout_min: The maximum amount of time (in minutes) to sleep.
 | |
|     @param delay_sec: The amount to sleep (in seconds) between each attempt.
 | |
| 
 | |
|     @return: The number of retry attempts in the case of exponential backoff.
 | |
|     """
 | |
|     # Estimate the max_retry in the case of exponential backoff:
 | |
|     # => total_sleep = sleep*sum(r=0..max_retry-1, backoff^r)
 | |
|     # => total_sleep = sleep( (1-backoff^max_retry) / (1-backoff) )
 | |
|     # => max_retry*ln(backoff) = ln(1-(total_sleep/sleep)*(1-backoff))
 | |
|     # => max_retry = ln(1-(total_sleep/sleep)*(1-backoff))/ln(backoff)
 | |
|     total_sleep = timeout_min * 60
 | |
|     numerator = math.log10(1 - (total_sleep / delay_sec) * (1 - backoff))
 | |
|     denominator = math.log10(backoff)
 | |
|     return int(math.ceil(numerator / denominator))
 | |
| 
 | |
| 
 | |
| class RetryingAFE(frontend.AFE):
 | |
|     """Wrapper around frontend.AFE that retries all RPCs.
 | |
| 
 | |
|     Timeout for retries and delay between retries are configurable.
 | |
|     """
 | |
|     def __init__(self, timeout_min=30, delay_sec=10, **dargs):
 | |
|         """Constructor
 | |
| 
 | |
|         @param timeout_min: timeout in minutes until giving up.
 | |
|         @param delay_sec: pre-jittered delay between retries in seconds.
 | |
|         """
 | |
|         self.timeout_min = timeout_min
 | |
|         self.delay_sec = delay_sec
 | |
|         super(RetryingAFE, self).__init__(**dargs)
 | |
| 
 | |
| 
 | |
|     def set_timeout(self, timeout_min):
 | |
|         """Set timeout minutes for the AFE server.
 | |
| 
 | |
|         @param timeout_min: The timeout minutes for AFE server.
 | |
|         """
 | |
|         self.timeout_min = timeout_min
 | |
| 
 | |
| 
 | |
|     def run(self, call, **dargs):
 | |
|         """Method for running RPC call.
 | |
| 
 | |
|         @param call: A string RPC call.
 | |
|         @param dargs: the parameters of the RPC call.
 | |
|         """
 | |
|         if retry_util is None:
 | |
|             raise ImportError('Unable to import chromite. Please consider '
 | |
|                               'running build_externals to build site packages.')
 | |
|         # exc_retry: We retry if this exception is raised.
 | |
|         # raiselist: Exceptions that we raise immediately if caught.
 | |
|         exc_retry = Exception
 | |
|         raiselist = (ImportError, error.RPCException, proxy.JSONRPCException,
 | |
|                      timeout_util.TimeoutError, error.ControlFileNotFound)
 | |
|         backoff = 2
 | |
|         max_retry = convert_timeout_to_retry(backoff, self.timeout_min,
 | |
|                                              self.delay_sec)
 | |
| 
 | |
|         def _run(self, call, **dargs):
 | |
|             return super(RetryingAFE, self).run(call, **dargs)
 | |
| 
 | |
|         def handler(exc):
 | |
|             """Check if exc is an exc_retry or if it's in raiselist.
 | |
| 
 | |
|             @param exc: An exception.
 | |
| 
 | |
|             @return: True if exc is an exc_retry and is not
 | |
|                      in raiselist. False otherwise.
 | |
|             """
 | |
|             is_exc_to_check = isinstance(exc, exc_retry)
 | |
|             is_in_raiselist = isinstance(exc, raiselist)
 | |
|             return is_exc_to_check and not is_in_raiselist
 | |
| 
 | |
|         # If the call is not in main thread, signal can't be used to abort the
 | |
|         # call. In that case, use a basic retry which does not enforce timeout
 | |
|         # if the process hangs.
 | |
|         @retry.retry(Exception, timeout_min=self.timeout_min,
 | |
|                      delay_sec=self.delay_sec,
 | |
|                      raiselist=[ImportError, error.RPCException,
 | |
|                                 proxy.ValidationError])
 | |
|         def _run_in_child_thread(self, call, **dargs):
 | |
|             return super(RetryingAFE, self).run(call, **dargs)
 | |
| 
 | |
|         if isinstance(threading.current_thread(), threading._MainThread):
 | |
|             # Set the keyword argument for GenericRetry
 | |
|             dargs['sleep'] = self.delay_sec
 | |
|             dargs['backoff_factor'] = backoff
 | |
|             # timeout_util.Timeout fundamentally relies on sigalrm, and doesn't
 | |
|             # work at all in wsgi environment (just emits logs spam). So, don't
 | |
|             # use it in wsgi.
 | |
|             try:
 | |
|                 if env.IN_MOD_WSGI:
 | |
|                     return retry_util.GenericRetry(handler, max_retry, _run,
 | |
|                                                    self, call, **dargs)
 | |
|                 with timeout_util.Timeout(self.timeout_min * 60):
 | |
|                     return retry_util.GenericRetry(handler, max_retry, _run,
 | |
|                                                    self, call, **dargs)
 | |
|             except timeout_util.TimeoutError:
 | |
|                 c = metrics.Counter(
 | |
|                         'chromeos/autotest/retrying_afe/retry_timeout')
 | |
|                 # Reserve field job_details for future use.
 | |
|                 f = {'destination_server': self.server.split(':')[0],
 | |
|                      'call': call,
 | |
|                      'job_details': ''}
 | |
|                 c.increment(fields=f)
 | |
|                 raise
 | |
|         else:
 | |
|             return _run_in_child_thread(self, call, **dargs)
 | |
| 
 | |
| 
 | |
| class RetryingTKO(frontend.TKO):
 | |
|     """Wrapper around frontend.TKO that retries all RPCs.
 | |
| 
 | |
|     Timeout for retries and delay between retries are configurable.
 | |
|     """
 | |
|     def __init__(self, timeout_min=30, delay_sec=10, **dargs):
 | |
|         """Constructor
 | |
| 
 | |
|         @param timeout_min: timeout in minutes until giving up.
 | |
|         @param delay_sec: pre-jittered delay between retries in seconds.
 | |
|         """
 | |
|         self.timeout_min = timeout_min
 | |
|         self.delay_sec = delay_sec
 | |
|         super(RetryingTKO, self).__init__(**dargs)
 | |
| 
 | |
| 
 | |
|     def run(self, call, **dargs):
 | |
|         """Method for running RPC call.
 | |
| 
 | |
|         @param call: A string RPC call.
 | |
|         @param dargs: the parameters of the RPC call.
 | |
|         """
 | |
|         @retry.retry(Exception, timeout_min=self.timeout_min,
 | |
|                      delay_sec=self.delay_sec,
 | |
|                      raiselist=[ImportError, error.RPCException,
 | |
|                                 proxy.ValidationError])
 | |
|         def _run(self, call, **dargs):
 | |
|             return super(RetryingTKO, self).run(call, **dargs)
 | |
|         return _run(self, call, **dargs)
 |