376 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
			
		
		
	
	
			376 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
#!/usr/bin/env python3
 | 
						|
# -*- coding: utf-8 -*-
 | 
						|
# Copyright 2019 The Chromium OS Authors. All rights reserved.
 | 
						|
# Use of this source code is governed by a BSD-style license that can be
 | 
						|
# found in the LICENSE file.
 | 
						|
 | 
						|
"""Download profdata from different arches, merge them and upload to gs.
 | 
						|
 | 
						|
The script is used for updating the PGO profiles for LLVM. The workflow
 | 
						|
is that the script will download profdata from different PGO builds, merge
 | 
						|
them and then upload it to a gs location that LLVM can access.
 | 
						|
 | 
						|
The simplest way of using this script, is to run:
 | 
						|
    ./merge_profdata_and_upload.py --all_latest_profiles
 | 
						|
which will automatically grab profdata from latest PGO generate builders
 | 
						|
for three different architectures and merge them. LLVM hash is also
 | 
						|
detected automatically from the artifacts.
 | 
						|
 | 
						|
If you want to specify certain llvm hash, run it with:
 | 
						|
    ./merge_profdata_and_upload.py --all_latest_profiles --llvm_hash LLVM_HASH
 | 
						|
Note that hash checking will fail if the llvm hash you provided is not the
 | 
						|
same as those in artifacts, or llvm hash in different artifacts are not the
 | 
						|
same.
 | 
						|
 | 
						|
To only use profiles from buildbucket tasks for PGO generate, run it with:
 | 
						|
    ./merge_profdata_and_upload.py -b amd64/bb_id1 -b arm/bb_id2 ...
 | 
						|
The buildbucket id can be found using `bb ls` command after manually launched
 | 
						|
builder finishes.
 | 
						|
 | 
						|
There is a chance that builders only succeeded partially, in this case, you
 | 
						|
can run this script to merge both profdata from builder scheduled and manually
 | 
						|
launched:
 | 
						|
    ./merge_profdata_and_upload.py -l arm -l amd64 -b arm64/bb_id
 | 
						|
In this example, the script will merge profdata from arm and amd64 builder, and
 | 
						|
profdata from an arm64 buildbucket task.
 | 
						|
"""
 | 
						|
 | 
						|
from __future__ import print_function
 | 
						|
 | 
						|
import argparse
 | 
						|
import collections
 | 
						|
import distutils.spawn
 | 
						|
import json
 | 
						|
import os
 | 
						|
import os.path
 | 
						|
import shutil
 | 
						|
import subprocess
 | 
						|
import sys
 | 
						|
import tempfile
 | 
						|
 | 
						|
_LLVM_PROFDATA = '/usr/bin/llvm-profdata'
 | 
						|
_GS_PREFIX = 'gs://'
 | 
						|
 | 
						|
_LLVMMetadata = collections.namedtuple('_LLVMMetadata', ['head_sha'])
 | 
						|
 | 
						|
 | 
						|
def _fetch_gs_artifact(remote_name, local_name):
 | 
						|
  """Fetch single file from remote gs location to local.
 | 
						|
 | 
						|
  Args:
 | 
						|
    remote_name: full gs location to the file.
 | 
						|
    local_name: the name of local file to be copied to.
 | 
						|
  """
 | 
						|
  assert remote_name.startswith(_GS_PREFIX)
 | 
						|
  subprocess.check_call(['gsutil', 'cp', remote_name, local_name])
 | 
						|
 | 
						|
 | 
						|
def _get_gs_profdata(remote_profdata, arch):
 | 
						|
  """Fetch and extract profdata from remote gs location.
 | 
						|
 | 
						|
  Args:
 | 
						|
    remote_profdata: remote gs location of the profdata tarball.
 | 
						|
    arch: directory named with arch to saperate each profdata.
 | 
						|
 | 
						|
  Returns:
 | 
						|
    Local location of the extracted profdata.
 | 
						|
  """
 | 
						|
  tar = 'llvm_profdata.tar.xz'
 | 
						|
  _fetch_gs_artifact(remote_profdata, tar)
 | 
						|
  extract_cmd = ['tar', '-xvf', tar]
 | 
						|
 | 
						|
  profdata_name = subprocess.check_output(extract_cmd).strip()
 | 
						|
  # The output of the `tar` command should only contain one line of the
 | 
						|
  # extracted profdata name.
 | 
						|
  if b'.llvm.profdata' not in profdata_name:
 | 
						|
    raise RuntimeError('No profdata in the tarball: %s' % remote_profdata)
 | 
						|
 | 
						|
  os.mkdir(arch)
 | 
						|
  profdata_loc = os.path.join(arch, 'llvm.profdata')
 | 
						|
  os.rename(profdata_name, profdata_loc)
 | 
						|
  print('Profdata extracted to: %s' % profdata_loc)
 | 
						|
  return profdata_loc
 | 
						|
 | 
						|
 | 
						|
def _get_gs_metadata(remote_metadata):
 | 
						|
  """Fetch metadata from remote gs location and read the LLVM head_sha.
 | 
						|
 | 
						|
  Args:
 | 
						|
    remote_metadata: remote gs location of the metadata json file.
 | 
						|
 | 
						|
  Returns:
 | 
						|
    LLVM head_sha metadata
 | 
						|
  """
 | 
						|
  metadata_basename = 'llvm_metadata.json'
 | 
						|
  _fetch_gs_artifact(remote_metadata, metadata_basename)
 | 
						|
 | 
						|
  with open(metadata_basename) as f:
 | 
						|
    result = json.load(f)
 | 
						|
 | 
						|
  return _LLVMMetadata(head_sha=result['head_sha'])
 | 
						|
 | 
						|
 | 
						|
def _find_latest_artifacts(gs_url, arch):
 | 
						|
  """Fetch the latest profdata and metadata from a give gs location.
 | 
						|
 | 
						|
  Args:
 | 
						|
    gs_url: a gs location containing one or more artifacts to fetch.
 | 
						|
    arch: the arch profdata collected from.
 | 
						|
 | 
						|
  Returns:
 | 
						|
    A tuple of local profdata location and metadata
 | 
						|
  """
 | 
						|
  assert gs_url.startswith(_GS_PREFIX)
 | 
						|
  try:
 | 
						|
    # List all artifacts in the gs location and sort by time.
 | 
						|
    output = subprocess.check_output(['gsutil', 'ls', '-l', gs_url],
 | 
						|
                                     encoding='utf-8').strip().split('\n')
 | 
						|
    lines = sorted(output, key=lambda x: x.split()[1], reverse=True)
 | 
						|
  except subprocess.CalledProcessError:
 | 
						|
    raise RuntimeError('Artifacts not found: %s' % gs_url)
 | 
						|
 | 
						|
  # Use a loop to go through all artifacts to find the latest profdata.
 | 
						|
  # An example of the output of latest builder bucket:
 | 
						|
  # pylint: disable=line-too-long
 | 
						|
  #   5006528  2020-05-31T10:08:48Z  gs://chromeos-toolchain-artifacts/llvm-pgo/arm/llvm-11.0_pre387436_p20200403-r7-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm.profdata.tar.xz
 | 
						|
  #   56  2020-05-31T10:08:48Z  gs://chromeos-toolchain-artifacts/llvm-pgo/arm/llvm-11.0_pre387436_p20200403-r7-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm_metadata.json
 | 
						|
  #   5005952  2020-05-24T10:53:34Z  gs://chromeos-toolchain-artifacts/llvm-pgo/arm/llvm-11.0_pre387436_p20200403-r5-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm.profdata.tar.xz
 | 
						|
  #   56  2020-05-24T10:53:34Z  gs://chromeos-toolchain-artifacts/llvm-pgo/arm/llvm-11.0_pre387436_p20200403-r5-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm_metadata.json
 | 
						|
  # An example for the lines of buildbucket location:
 | 
						|
  #   5004260  2020-05-29T09:48:04Z  gs://chromeos-image-archive/arm-pgo-generate-llvm-next-toolchain/R85-13254.0.0-1-8879010326583123168/llvm-11.0_pre387436_p20200403-r7-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm.profdata.tar.xz
 | 
						|
  #   56  2020-05-29T09:48:04Z  gs://chromeos-image-archive/arm-pgo-generate-llvm-next-toolchain/R85-13254.0.0-1-8879010326583123168/llvm-11.0_pre387436_p20200403-r7-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm_metadata.json
 | 
						|
  # pylint: enable=line-too-long
 | 
						|
  profdata_url = ''
 | 
						|
  for line in lines:
 | 
						|
    url = line.split()[-1]
 | 
						|
    if '.llvm.profdata.tar.xz' in url:
 | 
						|
      profile_path = _get_gs_profdata(url, arch)
 | 
						|
      profdata_url = url
 | 
						|
      break
 | 
						|
  if not profile_path or not profdata_url:
 | 
						|
    raise RuntimeError('No profdata found from %s' % gs_url)
 | 
						|
 | 
						|
  metadata_url = profdata_url.replace('.llvm.profdata.tar.xz',
 | 
						|
                                      '.llvm_metadata.json')
 | 
						|
  metadata = _get_gs_metadata(metadata_url)
 | 
						|
  if not metadata:
 | 
						|
    raise RuntimeError('No metadata found from %s' % gs_url)
 | 
						|
  return metadata, profile_path
 | 
						|
 | 
						|
 | 
						|
def _fetch_from_latest(arch):
 | 
						|
  """Fetch artifacts from latest builders.
 | 
						|
 | 
						|
  Args:
 | 
						|
    arch: the arch profdata collected from.
 | 
						|
 | 
						|
  Returns:
 | 
						|
    A tuple of local profdata location and metadata
 | 
						|
  """
 | 
						|
  print('\nFETCHING LATEST PROFDATA ON %s...' % arch.upper())
 | 
						|
  remote_latest = (
 | 
						|
      '%schromeos-toolchain-artifacts/llvm-pgo/%s' % (_GS_PREFIX, arch))
 | 
						|
  return _find_latest_artifacts(remote_latest, arch)
 | 
						|
 | 
						|
 | 
						|
def _fetch_from_buildbucket(arch, bb):
 | 
						|
  """Fetch artifacts from buildbucket task.
 | 
						|
 | 
						|
  Args:
 | 
						|
    arch: the arch profdata collected from.
 | 
						|
    bb: buildbucket id.
 | 
						|
 | 
						|
  Returns:
 | 
						|
    A tuple of local profdata location and metadata
 | 
						|
  """
 | 
						|
  print('\nFETCHING BUILDBUCKET PROFDATA ON %s...' % arch.upper())
 | 
						|
  remote_arch = ('%schromeos-image-archive/%s-pgo-generate-llvm-next-toolchain'
 | 
						|
                 % (_GS_PREFIX, arch))
 | 
						|
  # List all buckets under {arch}-pgo-generate-llvm-next-toolchain and
 | 
						|
  # grep with buildbucket id.
 | 
						|
  remote_bb = subprocess.check_output(['gsutil', 'ls', remote_arch],
 | 
						|
                                      encoding='utf-8').strip().split('\n')
 | 
						|
  for line in remote_bb:
 | 
						|
    if bb in line:
 | 
						|
      return _find_latest_artifacts(line, arch)
 | 
						|
  raise RuntimeError('No matched results found in %s with bb: %s' % (arch, bb))
 | 
						|
 | 
						|
 | 
						|
def _merge_profdata(profdata_list, output_name):
 | 
						|
  """Merge profdata.
 | 
						|
 | 
						|
  Args:
 | 
						|
    profdata_list: list of profdata location of each arch.
 | 
						|
    output_name: name of merged profdata.
 | 
						|
  """
 | 
						|
  merge_cmd = [_LLVM_PROFDATA, 'merge', '-output', output_name] + profdata_list
 | 
						|
  print('\nMerging PGO profiles.\nCMD: %s' % merge_cmd)
 | 
						|
  subprocess.check_call(merge_cmd)
 | 
						|
 | 
						|
 | 
						|
def _tar_and_upload_profdata(profdata, name_suffix):
 | 
						|
  """Create a tarball of merged profdata and upload to certain gs location.
 | 
						|
 | 
						|
  Args:
 | 
						|
    profdata: location of merged profdata.
 | 
						|
    name_suffix: usually the LLVM head_sha.
 | 
						|
  """
 | 
						|
  tarball = 'llvm-profdata-%s.tar.xz' % name_suffix
 | 
						|
  print('Making profdata tarball: %s' % tarball)
 | 
						|
  subprocess.check_call(
 | 
						|
      ['tar', '--sparse', '-I', 'xz', '-cf', tarball, profdata])
 | 
						|
 | 
						|
  upload_location = '%schromeos-localmirror/distfiles/%s' % (_GS_PREFIX,
 | 
						|
                                                             tarball)
 | 
						|
 | 
						|
  # TODO: it's better to create a subdir: distfiles/llvm_pgo_profile, but
 | 
						|
  # now llvm could only recognize distfiles.
 | 
						|
  upload_cmd = [
 | 
						|
      'gsutil',
 | 
						|
      '-m',
 | 
						|
      'cp',
 | 
						|
      '-n',
 | 
						|
      '-a',
 | 
						|
      'public-read',
 | 
						|
      tarball,
 | 
						|
      upload_location,
 | 
						|
  ]
 | 
						|
  print('\nUploading tarball to gs.\nCMD: %s\n' % upload_cmd)
 | 
						|
 | 
						|
  # gsutil prints all status to stderr, oddly enough.
 | 
						|
  gs_output = subprocess.check_output(
 | 
						|
      upload_cmd, stderr=subprocess.STDOUT, encoding='utf-8')
 | 
						|
 | 
						|
  # gsutil exits successfully even if it uploaded nothing. It prints a summary
 | 
						|
  # of what all it did, though. Successful uploads are just a progress bar,
 | 
						|
  # unsuccessful ones note that items were skipped.
 | 
						|
  if 'Skipping existing item' in gs_output:
 | 
						|
    raise ValueError('Profile upload failed: would overwrite an existing '
 | 
						|
                     'profile at %s' % upload_location)
 | 
						|
 | 
						|
 | 
						|
def main():
 | 
						|
  parser = argparse.ArgumentParser(
 | 
						|
      description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
 | 
						|
  parser.add_argument(
 | 
						|
      '-a',
 | 
						|
      '--all_latest_profiles',
 | 
						|
      action='store_true',
 | 
						|
      help='Merge and upload profiles from the latest builders.')
 | 
						|
  parser.add_argument(
 | 
						|
      '-l',
 | 
						|
      '--latest',
 | 
						|
      default=[],
 | 
						|
      action='append',
 | 
						|
      help='User can specify the profdata from which builder with specific '
 | 
						|
      'architecture to download. By default, we merge profdata from arm, '
 | 
						|
      'arm64, amd64.')
 | 
						|
  parser.add_argument(
 | 
						|
      '-b',
 | 
						|
      '--buildbucket',
 | 
						|
      default=[],
 | 
						|
      action='append',
 | 
						|
      help='Extra pgo-generate-llvm-next-toolchain buildbucket results to be '
 | 
						|
      'used. Format should be: {arch}/{bb_id}.')
 | 
						|
  parser.add_argument(
 | 
						|
      '-o',
 | 
						|
      '--output',
 | 
						|
      default='llvm.profdata',
 | 
						|
      help='Where to put merged PGO profile. The default is to not save it '
 | 
						|
      'anywhere.')
 | 
						|
  parser.add_argument(
 | 
						|
      '--llvm_hash',
 | 
						|
      help='The LLVM hash to select for the profiles. Generally autodetected.')
 | 
						|
  args = parser.parse_args()
 | 
						|
 | 
						|
  if not args.all_latest_profiles and not (args.latest or args.buildbucket):
 | 
						|
    parser.error('Please specify whether to use latest profiles or '
 | 
						|
                 'profiles from buildbucket')
 | 
						|
 | 
						|
  if args.all_latest_profiles and (args.latest or args.buildbucket):
 | 
						|
    parser.error('--all_latest_profiles cannot be specified together '
 | 
						|
                 'with --latest or --buildbucket')
 | 
						|
 | 
						|
  latest = ['arm', 'arm64', 'amd64'] \
 | 
						|
    if args.all_latest_profiles else args.latest
 | 
						|
 | 
						|
  all_arch_list = latest.copy()
 | 
						|
  arch_bb_list = []
 | 
						|
  if args.buildbucket:
 | 
						|
    for arch_bb in args.buildbucket:
 | 
						|
      arch, bb = arch_bb.split('/')
 | 
						|
      arch_bb_list.append((arch, bb))
 | 
						|
      all_arch_list.append(arch)
 | 
						|
 | 
						|
  if len(set(all_arch_list)) != len(all_arch_list):
 | 
						|
    parser.error('Each arch can be only passed once.')
 | 
						|
 | 
						|
  if not distutils.spawn.find_executable(_LLVM_PROFDATA):
 | 
						|
    sys.exit(_LLVM_PROFDATA + ' not found; are you in the chroot?')
 | 
						|
 | 
						|
  initial_dir = os.getcwd()
 | 
						|
  temp_dir = tempfile.mkdtemp(prefix='merge_pgo')
 | 
						|
  success = True
 | 
						|
  try:
 | 
						|
    os.chdir(temp_dir)
 | 
						|
    profdata_list = []
 | 
						|
    heads = set()
 | 
						|
 | 
						|
    def append_artifacts(fetched_tuple):
 | 
						|
      llvm_metadata, profdata_loc = fetched_tuple
 | 
						|
      if os.path.getsize(profdata_loc) < 512 * 1024:
 | 
						|
        raise RuntimeError('The PGO profile in local path %s is suspiciously '
 | 
						|
                           'small. Something might have gone '
 | 
						|
                           'wrong.' % profdata_loc)
 | 
						|
      heads.add(llvm_metadata.head_sha)
 | 
						|
      profdata_list.append(profdata_loc)
 | 
						|
 | 
						|
    for arch in latest:
 | 
						|
      append_artifacts(_fetch_from_latest(arch))
 | 
						|
 | 
						|
    for arch, bb in arch_bb_list:
 | 
						|
      append_artifacts(_fetch_from_buildbucket(arch, bb))
 | 
						|
 | 
						|
    assert heads, "Didn't fetch anything?"
 | 
						|
 | 
						|
    def die_with_head_complaint(complaint):
 | 
						|
      extra = ' (HEADs found: %s)' % sorted(heads)
 | 
						|
      raise RuntimeError(complaint.rstrip() + extra)
 | 
						|
 | 
						|
    llvm_hash = args.llvm_hash
 | 
						|
    if not llvm_hash:
 | 
						|
      if len(heads) != 1:
 | 
						|
        die_with_head_complaint(
 | 
						|
            '%d LLVM HEADs were found, which is more than one. You probably '
 | 
						|
            'want a consistent set of HEADs for a profile. If you know you '
 | 
						|
            "don't, please specify --llvm_hash, and note that *all* profiles "
 | 
						|
            'will be merged into this final profile, regardless of their '
 | 
						|
            'reported HEAD.' % len(heads))
 | 
						|
      llvm_hash, = heads
 | 
						|
 | 
						|
    if llvm_hash not in heads:
 | 
						|
      assert llvm_hash == args.llvm_hash
 | 
						|
      die_with_head_complaint(
 | 
						|
          "HEAD %s wasn't found in any fetched artifacts." % llvm_hash)
 | 
						|
 | 
						|
    print('\nUsing LLVM hash: %s' % llvm_hash)
 | 
						|
 | 
						|
    _merge_profdata(profdata_list, args.output)
 | 
						|
    print('Merged profdata locates at %s' % os.path.abspath(args.output))
 | 
						|
    _tar_and_upload_profdata(args.output, name_suffix=llvm_hash)
 | 
						|
    print('\nMerged profdata uploaded successfully.')
 | 
						|
  except:
 | 
						|
    success = False
 | 
						|
    raise
 | 
						|
  finally:
 | 
						|
    os.chdir(initial_dir)
 | 
						|
    if success:
 | 
						|
      print('Clearing temp directory.')
 | 
						|
      shutil.rmtree(temp_dir, ignore_errors=True)
 | 
						|
    else:
 | 
						|
      print('Script fails, temp directory is at: %s' % temp_dir)
 | 
						|
 | 
						|
 | 
						|
if __name__ == '__main__':
 | 
						|
  sys.exit(main())
 |