356 lines
13 KiB
Python
Executable File
356 lines
13 KiB
Python
Executable File
#!/usr/bin/python2
|
|
|
|
# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
"""Module used to back up the mysql db and upload to Google Storage.
|
|
|
|
Usage:
|
|
backup_mysql_db.py --type=weekly --gs_bucket=gs://my_bucket --keep 10
|
|
|
|
gs_bucket may refer to a local location by omitting gs:// and giving a local
|
|
path if desired for testing. The example usage above creates a dump
|
|
of the autotest db, uploads it to gs://my_bucket/weekly/dump_file.date and
|
|
cleans up older dumps if there are more than 10 in that directory.
|
|
"""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import datetime
|
|
from distutils import version
|
|
import logging
|
|
import optparse
|
|
import os
|
|
import tempfile
|
|
|
|
import common
|
|
|
|
from autotest_lib.client.common_lib import error
|
|
from autotest_lib.client.common_lib import global_config
|
|
from autotest_lib.client.common_lib import utils
|
|
|
|
from chromite.lib import metrics
|
|
from chromite.lib import ts_mon_config
|
|
from six.moves import range
|
|
|
|
_ATTEMPTS = 3
|
|
_GSUTIL_BIN = 'gsutil'
|
|
_GS_BUCKET = 'gs://chromeos-lab/backup/database'
|
|
# TODO(scottz): Should we need to ignore more than one database a general
|
|
# function should be designed that lists tables in the database and properly
|
|
# creates the --ignore-table= args to be passed to mysqldump.
|
|
# Tables to ignore when dumping all databases.
|
|
# performance_schema is an internal database that cannot be dumped
|
|
IGNORE_TABLES = ['performance_schema.cond_instances',
|
|
'performance_schema.events_waits_current',
|
|
'performance_schema.cond_instances',
|
|
'performance_schema.events_waits_history',
|
|
'performance_schema.events_waits_history_long',
|
|
'performance_schema.events_waits_summary_by_instance',
|
|
('performance_schema.'
|
|
'events_waits_summary_by_thread_by_event_name'),
|
|
'performance_schema.events_waits_summary_global_by_event_name',
|
|
'performance_schema.file_instances',
|
|
'performance_schema.file_summary_by_event_name',
|
|
'performance_schema.file_summary_by_instance',
|
|
'performance_schema.mutex_instances',
|
|
'performance_schema.performance_timers',
|
|
'performance_schema.rwlock_instances',
|
|
'performance_schema.setup_consumers',
|
|
'performance_schema.setup_instruments',
|
|
'performance_schema.setup_timers',
|
|
'performance_schema.threads']
|
|
|
|
# Conventional mysqldump schedules.
|
|
_DAILY = 'daily'
|
|
_WEEKLY = 'weekly'
|
|
_MONTHLY = 'monthly'
|
|
|
|
# Dump of server db only
|
|
_SERVER_DB = 'server_db'
|
|
|
|
# Contrary to a conventional mysql dump which takes O(hours) on large databases,
|
|
# a host dump is the cheapest form of backup possible. We dump the output of a
|
|
# of a mysql command showing all hosts and their pool labels to a text file that
|
|
# is backed up to google storage.
|
|
_ONLY_HOSTS = 'only_hosts'
|
|
_ONLY_SHARDS = 'only_shards'
|
|
_SCHEDULER_TYPES = [_SERVER_DB, _ONLY_HOSTS, _ONLY_SHARDS,
|
|
_DAILY, _WEEKLY, _MONTHLY]
|
|
|
|
class BackupError(Exception):
|
|
"""Raised for error occurred during backup."""
|
|
|
|
|
|
class MySqlArchiver(object):
|
|
"""Class that archives the Autotest MySQL DB to Google Storage.
|
|
|
|
Vars:
|
|
gs_dir: The path to the directory in Google Storage that this dump file
|
|
will be uploaded to.
|
|
number_to_keep: The number of dumps we should store.
|
|
"""
|
|
_AUTOTEST_DB = "chromeos_autotest_db"
|
|
_SERVER_DB = "chromeos_lab_servers"
|
|
|
|
|
|
def __init__(self, scheduled_type, number_to_keep, gs_bucket):
|
|
# For conventional scheduled type, we back up all databases.
|
|
# self._db is only used when scheduled_type is not
|
|
# conventional scheduled type.
|
|
self._db = self._get_db_name(scheduled_type)
|
|
self._gs_dir = '/'.join([gs_bucket, scheduled_type])
|
|
self._number_to_keep = number_to_keep
|
|
self._type = scheduled_type
|
|
|
|
|
|
@classmethod
|
|
def _get_db_name(cls, scheduled_type):
|
|
"""Get the db name to backup.
|
|
|
|
@param scheduled_type: one of _SCHEDULER_TYPES.
|
|
|
|
@returns: The name of the db to backup.
|
|
Or None for backup all dbs.
|
|
"""
|
|
if scheduled_type == _SERVER_DB:
|
|
return cls._SERVER_DB
|
|
elif scheduled_type in [_ONLY_HOSTS, _ONLY_SHARDS]:
|
|
return cls._AUTOTEST_DB
|
|
else:
|
|
return None
|
|
|
|
@staticmethod
|
|
def _get_user_pass():
|
|
"""Returns a tuple containing the user/pass to use to access the DB."""
|
|
user = global_config.global_config.get_config_value(
|
|
'CROS', 'db_backup_user')
|
|
password = global_config.global_config.get_config_value(
|
|
'CROS', 'db_backup_password')
|
|
return user, password
|
|
|
|
|
|
def create_mysql_dump(self):
|
|
"""Returns the path to a mysql dump of the current autotest DB."""
|
|
user, password = self._get_user_pass()
|
|
_, filename = tempfile.mkstemp('autotest_db_dump')
|
|
logging.debug('Dumping mysql database to file %s', filename)
|
|
extra_dump_args = ''
|
|
for entry in IGNORE_TABLES:
|
|
extra_dump_args += '--ignore-table=%s ' % entry
|
|
if self._type in [_WEEKLY, _MONTHLY]:
|
|
extra_dump_args += '--dump-slave '
|
|
|
|
if not self._db:
|
|
extra_dump_args += "--all-databases"
|
|
db_name = self._db or ''
|
|
utils.system('set -o pipefail; mysqldump --user=%s '
|
|
'--password=%s %s %s| gzip - > %s' % (
|
|
user, password, extra_dump_args, db_name, filename))
|
|
return filename
|
|
|
|
|
|
def _create_dump_from_query(self, query):
|
|
"""Dumps result of a query into a text file.
|
|
|
|
@param query: Query to execute.
|
|
|
|
@return: The path to a tempfile containing the response of the query.
|
|
"""
|
|
if not self._db:
|
|
raise BackupError("_create_dump_from_query requires a specific db.")
|
|
parameters = {'db': self._db, 'query': query}
|
|
parameters['user'], parameters['password'] = self._get_user_pass()
|
|
_, parameters['filename'] = tempfile.mkstemp('autotest_db_dump')
|
|
utils.system(
|
|
'set -o pipefail; mysql -u %(user)s -p%(password)s '
|
|
'%(db)s -e "%(query)s" > %(filename)s' %
|
|
parameters)
|
|
return parameters['filename']
|
|
|
|
|
|
def create_host_dump(self):
|
|
"""Dumps hosts and their labels into a text file.
|
|
|
|
@return: The path to a tempfile containing a dump of
|
|
hosts and their pool labels.
|
|
"""
|
|
respect_static_labels = global_config.global_config.get_config_value(
|
|
'SKYLAB', 'respect_static_labels', type=bool, default=False)
|
|
template = ('SELECT hosts.hostname, labels.name FROM afe_hosts AS '
|
|
'hosts JOIN %(hosts_labels_table)s AS hlt ON '
|
|
'hosts.id = hlt.host_id '
|
|
'JOIN %(labels_table)s AS labels '
|
|
'ON labels.id = hlt.%(column)s '
|
|
'WHERE labels.name LIKE \'%%pool%%\';')
|
|
if respect_static_labels:
|
|
# HACK: We're not checking the replaced_by_static_label on the
|
|
# pool label and just hard coding the fact that pool labels are
|
|
# indeed static labels. Expedience.
|
|
query = template % {
|
|
'hosts_labels_table': 'afe_static_hosts_labels',
|
|
'labels_table': 'afe_static_labels',
|
|
'column': 'staticlabel_id',
|
|
}
|
|
else:
|
|
query = template % {
|
|
'hosts_labels_table': 'afe_hosts_labels',
|
|
'labels_table': 'afe_labels',
|
|
'column': 'label_id',
|
|
}
|
|
return self._create_dump_from_query(query)
|
|
|
|
|
|
def create_shards_dump(self):
|
|
"""Dumps shards and their labels into a text file.
|
|
|
|
@return: The path to a tempfile containing a dump of
|
|
shards and their labels.
|
|
"""
|
|
query = ('SELECT hostname, labels.name FROM afe_shards AS shards '
|
|
'JOIN afe_shards_labels '
|
|
'ON shards.id = afe_shards_labels.shard_id '
|
|
'JOIN afe_labels AS labels '
|
|
'ON labels.id = afe_shards_labels.label_id;')
|
|
return self._create_dump_from_query(query)
|
|
|
|
|
|
def dump(self):
|
|
"""Creates a data dump based on the type of schedule.
|
|
|
|
@return: The path to a file containing the dump.
|
|
"""
|
|
if self._type == _ONLY_HOSTS:
|
|
return self.create_host_dump()
|
|
if self._type == _ONLY_SHARDS:
|
|
return self.create_shards_dump()
|
|
return self.create_mysql_dump()
|
|
|
|
|
|
def _get_name(self):
|
|
"""Returns the name of the dump as presented to google storage."""
|
|
if self._type in [_ONLY_HOSTS, _ONLY_SHARDS]:
|
|
file_type = 'txt'
|
|
else:
|
|
file_type = 'gz'
|
|
return 'autotest-dump.%s.%s' % (
|
|
datetime.datetime.now().strftime('%y.%m.%d'), file_type)
|
|
|
|
|
|
@staticmethod
|
|
def _retry_run(cmd):
|
|
"""Run the specified |cmd| string, retrying if necessary.
|
|
|
|
Args:
|
|
cmd: The command to run.
|
|
"""
|
|
for attempt in range(_ATTEMPTS):
|
|
try:
|
|
return utils.system_output(cmd)
|
|
except error.CmdError:
|
|
if attempt == _ATTEMPTS - 1:
|
|
raise
|
|
else:
|
|
logging.error('Failed to run %r', cmd)
|
|
|
|
|
|
def upload_to_google_storage(self, dump_file):
|
|
"""Uploads the given |dump_file| to Google Storage.
|
|
|
|
@param dump_file: The path to the file containing the dump.
|
|
"""
|
|
cmd = '%(gs_util)s cp %(dump_file)s %(gs_dir)s/%(name)s'
|
|
input_dict = dict(gs_util=_GSUTIL_BIN, dump_file=dump_file,
|
|
name=self._get_name(), gs_dir=self._gs_dir)
|
|
cmd = cmd % input_dict
|
|
logging.debug('Uploading mysql dump to google storage')
|
|
self._retry_run(cmd)
|
|
os.remove(dump_file)
|
|
|
|
|
|
def _get_gs_command(self, cmd):
|
|
"""Returns an array representing the command for rm or ls."""
|
|
# Helpful code to allow us to test without gs.
|
|
assert cmd in ['rm', 'ls']
|
|
gs_bin = _GSUTIL_BIN
|
|
if self._gs_dir.startswith('gs://'):
|
|
cmd_array = [gs_bin, cmd]
|
|
else:
|
|
cmd_array = [cmd]
|
|
|
|
return cmd_array
|
|
|
|
|
|
def _do_ls(self):
|
|
"""Returns the output of running ls on the gs bucket."""
|
|
cmd = self._get_gs_command('ls') + [self._gs_dir]
|
|
return self._retry_run(' '.join(cmd))
|
|
|
|
|
|
def cleanup(self):
|
|
"""Cleans up the gs bucket to ensure we don't over archive."""
|
|
logging.debug('Cleaning up previously archived dump files.')
|
|
listing = self._do_ls()
|
|
ordered_listing = sorted(listing.splitlines(), key=version.LooseVersion)
|
|
if len(ordered_listing) < self._number_to_keep:
|
|
logging.debug('Cleanup found nothing to do.')
|
|
return
|
|
|
|
to_remove = ordered_listing[:-self._number_to_keep]
|
|
rm_cmd = self._get_gs_command('rm')
|
|
for artifact in to_remove:
|
|
cmd = ' '.join(rm_cmd + [artifact])
|
|
self._retry_run(cmd)
|
|
|
|
|
|
def parse_options():
|
|
"""Parses given options."""
|
|
parser = optparse.OptionParser()
|
|
parser.add_option('--gs_bucket', default=_GS_BUCKET,
|
|
help='Google storage bucket to store mysql db dumps.')
|
|
parser.add_option('--keep', default=10, type=int,
|
|
help='Number of dumps to keep of specified type.')
|
|
parser.add_option('--type', default=_DAILY,
|
|
help='The type of mysql dump to store.')
|
|
parser.add_option('--verbose', default=False, action='store_true',
|
|
help='Google storage bucket to store mysql db dumps.')
|
|
options = parser.parse_args()[0]
|
|
if options.type not in _SCHEDULER_TYPES:
|
|
parser.error('Type must be either: %s.' % ', '.join(_SCHEDULER_TYPES))
|
|
|
|
return options
|
|
|
|
|
|
def main():
|
|
"""Runs the program."""
|
|
options = parse_options()
|
|
backup_succeeded = False
|
|
|
|
with ts_mon_config.SetupTsMonGlobalState(service_name='mysql_db_backup',
|
|
indirect=True):
|
|
with metrics.SecondsTimer(
|
|
'chromeos/autotest/afe_db/backup/durations',
|
|
fields={'type': options.type}):
|
|
try:
|
|
logging.debug('Start db backup: %s', options.type)
|
|
archiver = MySqlArchiver(
|
|
options.type, options.keep, options.gs_bucket)
|
|
dump_file = archiver.dump()
|
|
logging.debug('Uploading backup: %s', options.type)
|
|
archiver.upload_to_google_storage(dump_file)
|
|
archiver.cleanup()
|
|
logging.debug('Db backup completed: %s', options.type)
|
|
backup_succeeded = True
|
|
finally:
|
|
metrics.Counter(
|
|
'chromeos/autotest/db/db_backup/completed').increment(
|
|
fields={'success': backup_succeeded,
|
|
'type': options.type})
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|