165 lines
7.1 KiB
Python
165 lines
7.1 KiB
Python
""" NNAPI systrace parser - aggegation of timing from multiple threads """
|
|
|
|
# TODO:
|
|
# - phase and layer totals
|
|
import math
|
|
|
|
from parser.naming import layers, phases, subphases
|
|
from parser.naming import (PHASE_OVERALL, PHASE_TERMINATION, PHASE_WARMUP,
|
|
PHASE_BENCHMARK, PHASE_EXECUTION, PHASE_INITIALIZATION,
|
|
PHASE_INPUTS_AND_OUTPUTS, PHASE_RESULTS)
|
|
from parser.naming import LAYER_APPLICATION, LAYER_IPC, LAYER_DRIVER
|
|
LAYER_TOTAL = "LT" # Total across layers
|
|
|
|
def aggregate_times(tracker_map, special_case_lr_pe=True):
|
|
""" Takes the trackers for each thread and produces timing statistics for
|
|
all layers and phases.
|
|
|
|
Returns (times, self_times, has_warmup and has_benchmark, execution_counts),
|
|
where:
|
|
- times and self_times are nested dictionaries of the form
|
|
phase -> layer -> time with the following notes:
|
|
- phase is flattened over all phases, except PHASE_WARMUP and
|
|
PHASE_BENCHMARK, where the structure is phase -> phase -> layer -> time
|
|
- PHASE_WARMUP and PHASE_BENCHMARK only nest execution and its
|
|
subphases
|
|
- PHASE_WARMUP and PHASE_BENCHMARK are not present if the trace does
|
|
not contain them
|
|
- the first level phase contains total over PHASE_WARMUP and
|
|
PHASE_BENCHMARK if present
|
|
- time may be math.nan if the data is not present in the trace
|
|
- in addition to the layer from parser.naming, LAYER_TOTAL holds
|
|
the total time spent in that layer over all phases
|
|
- execution_counts contains a dictionary of the form
|
|
{PHASE_OVERALL, PHASE_WARMUP, PHASE_BENCHMARK} -> no of executions
|
|
"""
|
|
all_application_phases = [PHASE_OVERALL, PHASE_WARMUP, PHASE_BENCHMARK]
|
|
# Calculate execution counts
|
|
execution_counts = dict()
|
|
for app_phase in all_application_phases:
|
|
execution_count = 0
|
|
for pid in tracker_map:
|
|
execution_count = max(execution_count, tracker_map[pid].get_execution_count(app_phase))
|
|
execution_counts[app_phase] = execution_count
|
|
has_warmup = bool(execution_counts[PHASE_WARMUP])
|
|
has_benchmark = bool(execution_counts[PHASE_BENCHMARK])
|
|
if not (has_warmup and has_benchmark):
|
|
all_application_phases = [PHASE_OVERALL]
|
|
|
|
# Create dicts
|
|
times = {}
|
|
self_times = {}
|
|
if has_warmup and has_benchmark:
|
|
for app_phase in [PHASE_WARMUP, PHASE_BENCHMARK]:
|
|
times[app_phase] = {}
|
|
self_times[app_phase] = {}
|
|
for phase in _phase_and_subphases(PHASE_EXECUTION):
|
|
times[app_phase][phase] = {}
|
|
self_times[app_phase][phase] = {}
|
|
for phase in phases + [PHASE_OVERALL] + subphases[PHASE_EXECUTION]:
|
|
times[phase] = {}
|
|
self_times[phase] = {}
|
|
|
|
# Gather total times from all threads, calculate layer and phase totals
|
|
for layer in layers:
|
|
for phase0 in [PHASE_OVERALL] + phases:
|
|
for phase in _phase_and_subphases(phase0):
|
|
t = 0.0
|
|
tag = layer + "_" + phase
|
|
for app_phase in all_application_phases:
|
|
t0 = 0.0
|
|
if layer == LAYER_DRIVER and phase == PHASE_EXECUTION:
|
|
# Calculate driver execution times from begins and ends
|
|
begins = []
|
|
ends = []
|
|
for pid in tracker_map:
|
|
begins = begins + tracker_map[pid].get_ld_pe_begins(app_phase)
|
|
ends = ends + tracker_map[pid].get_ld_pe_ends(app_phase)
|
|
assert len(begins) == len(ends)
|
|
begins.sort()
|
|
ends.sort()
|
|
for i in range(0, len(begins)):
|
|
t0 += (ends[i] - begins[i])
|
|
else:
|
|
for pid in tracker_map:
|
|
t0 += tracker_map[pid].get_stat(tag, app_phase, special_case_lr_pe)
|
|
if phase0 == PHASE_EXECUTION and (app_phase != PHASE_OVERALL):
|
|
times[app_phase][phase][layer] = zero_to_nan_if_missing(t0, phase, layer)
|
|
t += t0
|
|
times[phase][layer] = zero_to_nan_if_missing(t, phase, layer)
|
|
if not times[PHASE_OVERALL][layer]:
|
|
times[PHASE_OVERALL][layer] = sum(nan_to_zero(times[phase][layer]) for phase in phases)
|
|
for phase0 in [PHASE_OVERALL] + phases:
|
|
for phase in _phase_and_subphases(phase0):
|
|
times[phase][LAYER_TOTAL] = max_ignoring_nans(times[phase].values())
|
|
if phase0 == PHASE_EXECUTION and (has_warmup and has_benchmark):
|
|
for app_phase in [PHASE_WARMUP, PHASE_BENCHMARK]:
|
|
times[app_phase][phase][LAYER_TOTAL] = max_ignoring_nans(times[app_phase][phase].values())
|
|
|
|
# Calculate self-times for each layer
|
|
for phase0 in [PHASE_OVERALL] + phases:
|
|
for phase in _phase_and_subphases(phase0):
|
|
self_times[phase][LAYER_TOTAL] = times[phase][LAYER_TOTAL]
|
|
if phase0 == PHASE_EXECUTION and (has_warmup and has_benchmark):
|
|
for app_phase in [PHASE_WARMUP, PHASE_BENCHMARK]:
|
|
self_times[app_phase][phase][LAYER_TOTAL] = times[app_phase][phase][LAYER_TOTAL]
|
|
t = 0.0
|
|
for layer in reversed(layers):
|
|
if math.isnan(times[phase][layer]):
|
|
self_times[phase][layer] = math.nan
|
|
elif times[phase][layer] == 0.0:
|
|
self_times[phase][layer] = 0.0
|
|
elif (phase == PHASE_OVERALL and
|
|
(layer == LAYER_DRIVER or layer == LAYER_IPC) and
|
|
times[PHASE_EXECUTION][LAYER_DRIVER] == 0.0):
|
|
# Driver was only used for initialization phase, did not support
|
|
# execution of the model
|
|
if layer == LAYER_DRIVER:
|
|
self_times[phase][layer] = times[phase][layer]
|
|
else:
|
|
self_times[phase][layer] = times[phase][layer] - times[phase][LAYER_DRIVER]
|
|
else:
|
|
self_times[phase][layer] = times[phase][layer] - t
|
|
t = times[phase][layer]
|
|
if phase0 == PHASE_EXECUTION and (has_benchmark or has_warmup):
|
|
for app_phase in [PHASE_WARMUP, PHASE_BENCHMARK]:
|
|
t = 0.0
|
|
for layer in reversed(layers):
|
|
if math.isnan(times[app_phase][phase][layer]):
|
|
self_times[app_phase][phase][layer] = math.nan
|
|
elif times[app_phase][phase][layer] == 0.0:
|
|
self_times[app_phase][phase][layer] = 0.0
|
|
else:
|
|
self_times[app_phase][phase][layer] = times[app_phase][phase][layer] - t
|
|
t = times[app_phase][phase][layer]
|
|
|
|
return (times, self_times, has_warmup and has_benchmark, execution_counts)
|
|
|
|
def zero_to_nan_if_missing(f, phase, layer):
|
|
""" Turn zero time to a NaN to indicate missing data, when we think that
|
|
the data is really missing. Data should only be missing from the
|
|
Application layer (applications may not have any tracing) and
|
|
the subphases of Execution in the Driver layer (other phases are
|
|
discernible from the automatic HIDL tracepoints)."""
|
|
if f == 0.0:
|
|
if layer == LAYER_APPLICATION:
|
|
return math.nan
|
|
if layer == LAYER_DRIVER and phase in subphases[PHASE_EXECUTION]:
|
|
return math.nan
|
|
return f
|
|
|
|
def nan_to_zero(f):
|
|
if math.isnan(f):
|
|
return 0.0
|
|
return f
|
|
|
|
def _phase_and_subphases(phase):
|
|
if phase == PHASE_OVERALL:
|
|
return [phase]
|
|
if phase == PHASE_WARMUP or phase == PHASE_BENCHMARK:
|
|
return []
|
|
return [phase] + subphases.get(phase, [])
|
|
|
|
def max_ignoring_nans(xs):
|
|
return max(map(nan_to_zero, xs))
|