// SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021 Google LLC. * * Based on klockstat from BCC by Jiri Olsa and others * 2021-10-26 Barret Rhoden Created this. */ #include "vmlinux.h" #include #include #include #include "klockstat.h" #include "bits.bpf.h" const volatile pid_t targ_tgid = 0; const volatile pid_t targ_pid = 0; struct mutex *const volatile targ_lock = NULL; struct { __uint(type, BPF_MAP_TYPE_STACK_TRACE); __uint(max_entries, MAX_ENTRIES); __uint(key_size, sizeof(u32)); __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64)); } stack_map SEC(".maps"); /* * Uniquely identifies a task grabbing a particular lock; a task can only hold * the same lock once (non-recursive mutexes). */ struct task_lock { u64 task_id; u64 lock_ptr; }; struct lockholder_info { s32 stack_id; u64 task_id; u64 try_at; u64 acq_at; u64 rel_at; }; struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, MAX_ENTRIES); __type(key, struct task_lock); __type(value, struct lockholder_info); } lockholder_map SEC(".maps"); /* * Keyed by stack_id. * * Multiple call sites may have the same underlying lock, but we only know the * stats for a particular stack frame. Multiple tasks may have the same * stackframe. */ struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, MAX_ENTRIES); __type(key, s32); __type(value, struct lock_stat); } stat_map SEC(".maps"); static bool tracing_task(u64 task_id) { u32 tgid = task_id >> 32; u32 pid = task_id; if (targ_tgid && targ_tgid != tgid) return false; if (targ_pid && targ_pid != pid) return false; return true; } static void lock_contended(void *ctx, struct mutex *lock) { u64 task_id; struct lockholder_info li[1] = {0}; struct task_lock tl = {}; if (targ_lock && targ_lock != lock) return; task_id = bpf_get_current_pid_tgid(); if (!tracing_task(task_id)) return; li->task_id = task_id; /* * Skip 4 frames, e.g.: * __this_module+0x34ef * __this_module+0x34ef * __this_module+0x8c44 * mutex_lock+0x5 * * Note: if you make major changes to this bpf program, double check * that you aren't skipping too many frames. */ li->stack_id = bpf_get_stackid(ctx, &stack_map, 4 | BPF_F_FAST_STACK_CMP); /* Legit failures include EEXIST */ if (li->stack_id < 0) return; li->try_at = bpf_ktime_get_ns(); tl.task_id = task_id; tl.lock_ptr = (u64)lock; bpf_map_update_elem(&lockholder_map, &tl, li, BPF_ANY); } static void lock_aborted(struct mutex *lock) { u64 task_id; struct task_lock tl = {}; if (targ_lock && targ_lock != lock) return; task_id = bpf_get_current_pid_tgid(); if (!tracing_task(task_id)) return; tl.task_id = task_id; tl.lock_ptr = (u64)lock; bpf_map_delete_elem(&lockholder_map, &tl); } static void lock_acquired(struct mutex *lock) { u64 task_id; struct lockholder_info *li; struct task_lock tl = {}; if (targ_lock && targ_lock != lock) return; task_id = bpf_get_current_pid_tgid(); if (!tracing_task(task_id)) return; tl.task_id = task_id; tl.lock_ptr = (u64)lock; li = bpf_map_lookup_elem(&lockholder_map, &tl); if (!li) return; li->acq_at = bpf_ktime_get_ns(); } static void account(struct lockholder_info *li) { struct lock_stat *ls; u64 delta; /* * Multiple threads may have the same stack_id. Even though we are * holding the lock, dynamically allocated mutexes can have the same * callgraph but represent different locks. They will be accounted as * the same lock, which is what we want, but we need to use atomics to * avoid corruption, especially for the total_time variables. */ ls = bpf_map_lookup_elem(&stat_map, &li->stack_id); if (!ls) { struct lock_stat fresh = {0}; bpf_map_update_elem(&stat_map, &li->stack_id, &fresh, BPF_ANY); ls = bpf_map_lookup_elem(&stat_map, &li->stack_id); if (!ls) return; } delta = li->acq_at - li->try_at; __sync_fetch_and_add(&ls->acq_count, 1); __sync_fetch_and_add(&ls->acq_total_time, delta); if (delta > READ_ONCE(ls->acq_max_time)) { WRITE_ONCE(ls->acq_max_time, delta); WRITE_ONCE(ls->acq_max_id, li->task_id); /* * Potentially racy, if multiple threads think they are the max, * so you may get a clobbered write. */ bpf_get_current_comm(ls->acq_max_comm, TASK_COMM_LEN); } delta = li->rel_at - li->acq_at; __sync_fetch_and_add(&ls->hld_count, 1); __sync_fetch_and_add(&ls->hld_total_time, delta); if (delta > READ_ONCE(ls->hld_max_time)) { WRITE_ONCE(ls->hld_max_time, delta); WRITE_ONCE(ls->hld_max_id, li->task_id); bpf_get_current_comm(ls->hld_max_comm, TASK_COMM_LEN); } } static void lock_released(struct mutex *lock) { u64 task_id; struct lockholder_info *li; struct task_lock tl = {}; if (targ_lock && targ_lock != lock) return; task_id = bpf_get_current_pid_tgid(); if (!tracing_task(task_id)) return; tl.task_id = task_id; tl.lock_ptr = (u64)lock; li = bpf_map_lookup_elem(&lockholder_map, &tl); if (!li) return; li->rel_at = bpf_ktime_get_ns(); account(li); bpf_map_delete_elem(&lockholder_map, &tl); } SEC("fentry/mutex_lock") int BPF_PROG(mutex_lock, struct mutex *lock) { lock_contended(ctx, lock); return 0; } SEC("fexit/mutex_lock") int BPF_PROG(mutex_lock_exit, struct mutex *lock, long ret) { lock_acquired(lock); return 0; } SEC("fexit/mutex_trylock") int BPF_PROG(mutex_trylock_exit, struct mutex *lock, long ret) { if (ret) { lock_contended(ctx, lock); lock_acquired(lock); } return 0; } SEC("fentry/mutex_lock_interruptible") int BPF_PROG(mutex_lock_interruptible, struct mutex *lock) { lock_contended(ctx, lock); return 0; } SEC("fexit/mutex_lock_interruptible") int BPF_PROG(mutex_lock_interruptible_exit, struct mutex *lock, long ret) { if (ret) lock_aborted(lock); else lock_acquired(lock); return 0; } SEC("fentry/mutex_lock_killable") int BPF_PROG(mutex_lock_killable, struct mutex *lock) { lock_contended(ctx, lock); return 0; } SEC("fexit/mutex_lock_killable") int BPF_PROG(mutex_lock_killable_exit, struct mutex *lock, long ret) { if (ret) lock_aborted(lock); else lock_acquired(lock); return 0; } SEC("fentry/mutex_unlock") int BPF_PROG(mutex_unlock, struct mutex *lock) { lock_released(lock); return 0; } char LICENSE[] SEC("license") = "GPL";