android13/packages/services/Car/tools/cpu_perf/perfetto_cpu_analysis.py

312 lines
11 KiB
Python

#!/usr/bin/python3
# Copyright (C) 2022 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Tool to analyze CPU performance from perfetto trace
This too assumes that core clocks are fixed.
It will not give accurate results if clock frequecies change.
Should install perfetto: $ pip install perfetto
"""
import argparse
import sys
from perfetto.trace_processor import TraceProcessor
from config import get_script_dir as get_script_dir
from config import parse_config as parse_config
from config import add_line_with_indentation
# Get total idle time and active time from each core
QUERY_SCHED_CORE_SUM = """SELECT
cpu AS core,
SUM (CASE
WHEN utid = 0 THEN 0
ELSE dur
END) AS activeTime,
SUM (CASE
WHEN utid = 0 THEN dur
ELSE 0
END) AS idleTime
FROM sched
GROUP BY cpu
ORDER BY cpu"""
class CoreLoad:
def __init__(self, coreId, totalCycles):
self.coreId = coreId
self.totalCycles = totalCycles
class CPUExecutionInfo:
def __init__(self, name):
self.name = name
self.perCoreLoads = {} # key: core, value :CoreLoad
def addCoreLoad(self, load):
self.perCoreLoads[load.coreId] = load
def getCoreCycles(self, coreId):
l = self.perCoreLoads.get(coreId)
if l is None:
return 0
return l.totalCycles
def getTotalCycles(self):
sum = 0
for c in self.perCoreLoads:
l = self.perCoreLoads[c]
sum += l.totalCycles
return sum
class ThreadInfo(CPUExecutionInfo):
def print(self, totalCpuCycles, perCoreTotalCycles, loadPercentile):
indentation = 2
msgs = []
totalCpuLoad = float(self.getTotalCycles()) / totalCpuCycles * 100.0
activeCpuLoad = totalCpuLoad / loadPercentile * 100.0
add_line_with_indentation(msgs,
("{}: total: {:.3f}% active: {:.3f}%"\
.format(self.name, totalCpuLoad, activeCpuLoad)), indentation)
add_line_with_indentation(msgs, 50 * "-", indentation)
for c in sorted(self.perCoreLoads):
l = self.perCoreLoads[c]
coreLoad = float(l.totalCycles) / perCoreTotalCycles[c] * 100.0
add_line_with_indentation(msgs,
"{:<10} {:<15}".format("Core {}".format(c),
"{:.3f}%".format(coreLoad)),
indentation)
print("".join(msgs))
class ProcessInfo(CPUExecutionInfo):
def __init__(self, name):
super().__init__(name)
self.threads = [] # ThreadInfo
def get_filtered_threads(self, threadNames):
threads = list(filter(
lambda t: max(map(lambda filterName: t.name.find(filterName), threadNames)) > -1,
self.threads))
return threads
def print(self, totalCpuCycles, perCoreTotalCycles, loadPercentile, showThreads=False):
msgs = []
totalCpuLoad = float(self.getTotalCycles()) / totalCpuCycles * 100.0
activeCpuLoad = totalCpuLoad / loadPercentile * 100.0
msgs.append("{}: total: {:.3f}% active: {:.3f}%"\
.format(self.name, totalCpuLoad, activeCpuLoad))
msgs.append("\n" + 50 * "-")
for c in sorted(self.perCoreLoads):
l = self.perCoreLoads[c]
coreLoad = float(l.totalCycles) / perCoreTotalCycles[c] * 100.0
msgs.append("\n{:<10} {:<15}".format("Core {}".format(c), "{:.3f}%".format(coreLoad)))
print(''.join(msgs))
if showThreads:
self.threads.sort(reverse = True, key = lambda p : p.getTotalCycles())
for t in self.threads:
t.print(totalCpuCycles, perCoreTotalCycles, loadPercentile)
print('\n')
class TotalCoreLoad:
def __init__(self, coreId, activeTime, idleTime):
self.coreId = coreId
self.activeTime = activeTime
self.idleTime = idleTime
self.loadPercentile = float(activeTime) / (idleTime + activeTime) * 100.0
class SystemLoad:
def __init__(self):
self.totalLoads = [] # TotalCoreLoad
self.totalLoad = 0.0
self.processes = [] # ProcessInfo
def addTimeMeasurements(self, coreData, allCores):
coreLoads = {} # k: core, v: TotalCoreLoad
maxTotalTime = 0
for entry in coreData:
coreId = entry.core
activeTime = entry.activeTime
idleTime = entry.idleTime
totalTime = activeTime + idleTime
if maxTotalTime < totalTime:
maxTotalTime = totalTime
load = TotalCoreLoad(coreId, activeTime, idleTime)
coreLoads[coreId] = load
for c in allCores:
if coreLoads.get(c) is not None:
continue
# this core was not used at all. So add it with idle only
coreLoads[c] = TotalCoreLoad(c, 0, maxTotalTime)
for c in sorted(coreLoads):
self.totalLoads.append(coreLoads[c])
def get_filtered_processes(self, process_names):
processPerName = {}
for name in process_names:
processes = list(filter(lambda p: p.name.find(name) > -1, self.processes))
if len(processes) > 0:
processPerName[name] = processes
return processPerName
def print(self, cpuConfig, numTopN, filterProcesses, filterThreads):
print("\nTime based CPU load\n" + 30 * "=")
loadXClkSum = 0.0
maxCapacity = 0.0
perCoreCpuCycles = {}
totalCpuCycles = 0
maxCpuGHz = 0.0
print("{:<10} {:<15} {:<15} {:<15}\n{}".\
format("CPU", "CPU Load %", "CPU Usage", "Max CPU Freq.", 60 * "-"))
for l in self.totalLoads:
coreMaxFreqGHz = float(cpuConfig.coreMaxFreqKHz[l.coreId]) / 1e6
coreIdStr = "Core {}".format(l.coreId)
loadPercentileStr = "{:.3f}%".format(l.loadPercentile)
loadUsageStr = "{:.3f} GHz".format(l.loadPercentile * coreMaxFreqGHz / 100)
coreMaxFreqStr = "{:.3f} GHz".format(coreMaxFreqGHz)
print("{:<10} {:<15} {:<15} {:<15}".\
format(coreIdStr, loadPercentileStr, loadUsageStr, coreMaxFreqStr))
maxCpuGHz += coreMaxFreqGHz
loadXClkSum += l.loadPercentile * coreMaxFreqGHz
perCoreCpuCycles[l.coreId] = (l.activeTime + l.idleTime) * coreMaxFreqGHz
totalCpuCycles += perCoreCpuCycles[l.coreId]
loadPercentile = float(loadXClkSum) / maxCpuGHz
print("\nTotal Load: {:.3f}%, {:.2f} GHz with system max {:.2f} GHz".\
format(loadPercentile, loadPercentile * maxCpuGHz / 100.0, maxCpuGHz))
self.processes.sort(reverse = True, key = lambda p : p.getTotalCycles())
if filterThreads is not None:
print("\nFiltered threads\n" + 30 * "=")
processPerName = self.get_filtered_processes(filterThreads.keys())
if len(processPerName) == 0:
print("No process found matching filters.")
for name in processPerName:
for p in processPerName[name]:
threads = p.get_filtered_threads(filterThreads[name])
print("\n{}\n".format(p.name) + 30 * "-")
for t in threads:
t.print(totalCpuCycles, perCoreCpuCycles, loadPercentile)
if filterProcesses is not None:
print("\nFiltered processes\n" + 30 * "=")
processPerName = self.get_filtered_processes(filterProcesses)
if len(processPerName) == 0:
print("No process found matching filters.")
processes = sum(processPerName.values(), []) # flattens 2-D list
processes.sort(reverse = True, key = lambda p : p.getTotalCycles())
for p in processes:
p.print(totalCpuCycles, perCoreCpuCycles, loadPercentile, showThreads=True)
print("\nTop processes\n" + 30 * "=")
for p in self.processes[:numTopN]:
p.print(totalCpuCycles, perCoreCpuCycles, loadPercentile)
def init_arguments():
parser = argparse.ArgumentParser(description='Analyze CPU perf.')
parser.add_argument('-f', '--configfile', dest='config_file',
default=get_script_dir() + '/pixel6.config', type=argparse.FileType('r'),
help='CPU config file', )
parser.add_argument('-c', '--cpusettings', dest='cpusettings', action='store',
default='default',
help='CPU Settings to apply')
parser.add_argument('-n', '--number_of_top_processes', dest='number_of_top_processes',
action='store', type=int, default=5,
help='Number of processes to show in performance report')
parser.add_argument('-p', '--process-name', dest='process_names', action='append',
help='Name of process to filter')
parser.add_argument('-t', '--thread-name', dest='thread_names', action='append',
help='Name of thread to filter. Format: <process-name>:<thread-name>')
parser.add_argument('trace_file', action='store', nargs=1,
help='Perfetto trace file to analyze')
return parser.parse_args()
def get_core_load(coreData, cpuConfig):
cpuFreqKHz = cpuConfig.coreMaxFreqKHz[coreData.id]
if coreData.metrics.HasField('avg_freq_khz'):
cpuFreqKHz = coreData.metrics.avg_freq_khz
cpuCycles = cpuFreqKHz * coreData.metrics.runtime_ns / 1000000 # unit should be Hz * s
return CoreLoad(coreData.id, cpuCycles)
def run_analysis(
traceFile,
cpuConfig,
cpuSettings,
numTopN=5,
filterProcesses=None,
filterThreads=None
):
tp = TraceProcessor(file_path=traceFile)
systemLoad = SystemLoad()
# get idle and active times per each cores
core_times = tp.query(QUERY_SCHED_CORE_SUM)
systemLoad.addTimeMeasurements(core_times, cpuSettings.onlines)
cpu_metrics = tp.metric(['android_cpu']).android_cpu
for p in cpu_metrics.process_info:
info = ProcessInfo(p.name)
for c in p.core:
l = get_core_load(c, cpuConfig)
info.addCoreLoad(l)
for t in p.threads:
thread_info = ThreadInfo(t.name)
for tc in t.core:
tl = get_core_load(tc, cpuConfig)
thread_info.addCoreLoad(tl)
info.threads.append(thread_info)
systemLoad.processes.append(info)
systemLoad.print(cpuConfig, numTopN, filterProcesses, filterThreads)
def main():
args = init_arguments()
# parse config
cpuConfig = parse_config(args.config_file)
cpuSettings = cpuConfig.configs.get(args.cpusettings)
if cpuSettings is None:
print("Cannot find cpusettings {}".format(args.cpusettings))
return
threadsPerProcess = None
if args.thread_names is not None:
threadsPerProcess = {}
for threadName in args.thread_names:
names = threadName.split(':')
if len(names) != 2:
print(" Skipping {}: invalid format".format(threadName))
continue
process, thread = names
if process not in threadsPerProcess:
threadsPerProcess[process] = []
threadsPerProcess[process].append(thread)
if len(threadsPerProcess) == 0:
threadsPerProcess = None
run_analysis(args.trace_file[0],
cpuConfig,
cpuSettings,
args.number_of_top_processes,
args.process_names,
threadsPerProcess)
if __name__ == '__main__':
main()