mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-23 16:53:58 -05:00
perf tools fixes for 5.11:
- Fix 'CPU too large' error in Intel PT.
- Correct event attribute sizes in 'perf inject'.
- Sync build_bug.h and kvm.h kernel copies.
- Fix bpf.h header include directive in 5sec.c 'perf trace' bpf example.
- libbpf tests fixes.
- Fix shadow stat 'perf test' for non-bash shells.
- Take cgroups into account for shadow stats in 'perf stat'.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Test results:
The first ones are container based builds of tools/perf with and without libelf
support. Where clang is available, it is also used to build perf with/without
libelf, and building with LIBCLANGLLVM=1 (built-in clang) with gcc and clang
when clang and its devel libraries are installed.
The objtool and samples/bpf/ builds are disabled now that I'm switching from
using the sources in a local volume to fetching them from a http server to
build it inside the container, to make it easier to build in a container cluster.
Those will come back later.
Several are cross builds, the ones with -x-ARCH and the android one, and those
may not have all the features built, due to lack of multi-arch devel packages,
available and being used so far on just a few, like
debian:experimental-x-{arm64,mipsel}.
The 'perf test' one will perform a variety of tests exercising
tools/perf/util/, tools/lib/{bpf,traceevent,etc}, as well as run perf commands
with a variety of command line event specifications to then intercept the
sys_perf_event syscall to check that the perf_event_attr fields are set up as
expected, among a variety of other unit tests.
Then there is the 'make -C tools/perf build-test' ones, that build tools/perf/
with a variety of feature sets, exercising the build with an incomplete set of
features as well as with a complete one. It is planned to have it run on each
of the containers mentioned above, using some container orchestration
infrastructure. Get in contact if interested in helping having this in place.
$ grep "model name" -m1 /proc/cpuinfo
model name: AMD Ryzen 9 3900X 12-Core Processor
# export PERF_TARBALL=http://192.168.86.5/perf/perf-5.11.0-rc3.tar.xz
# dm
1 66.93 alpine:3.4 : Ok gcc (Alpine 5.3.0) 5.3.0, clang version 3.8.0 (tags/RELEASE_380/final)
2 68.65 alpine:3.5 : Ok gcc (Alpine 6.2.1) 6.2.1 20160822, clang version 3.8.1 (tags/RELEASE_381/final)
3 73.00 alpine:3.6 : Ok gcc (Alpine 6.3.0) 6.3.0, clang version 4.0.0 (tags/RELEASE_400/final)
4 79.04 alpine:3.7 : Ok gcc (Alpine 6.4.0) 6.4.0, Alpine clang version 5.0.0 (tags/RELEASE_500/final) (based on LLVM 5.0.0)
5 79.71 alpine:3.8 : Ok gcc (Alpine 6.4.0) 6.4.0, Alpine clang version 5.0.1 (tags/RELEASE_501/final) (based on LLVM 5.0.1)
6 82.51 alpine:3.9 : Ok gcc (Alpine 8.3.0) 8.3.0, Alpine clang version 5.0.1 (tags/RELEASE_502/final) (based on LLVM 5.0.1)
7 103.45 alpine:3.10 : Ok gcc (Alpine 8.3.0) 8.3.0, Alpine clang version 8.0.0 (tags/RELEASE_800/final) (based on LLVM 8.0.0)
8 113.86 alpine:3.11 : Ok gcc (Alpine 9.3.0) 9.3.0, Alpine clang version 9.0.0 (https://git.alpinelinux.org/aports f7f0d2c2b8bcd6a5843401a9a702029556492689) (based on LLVM 9.0.0)
9 109.31 alpine:3.12 : Ok gcc (Alpine 9.3.0) 9.3.0, Alpine clang version 10.0.0 (https://gitlab.alpinelinux.org/alpine/aports.git 7445adce501f8473efdb93b17b5eaf2f1445ed4c)
10 113.90 alpine:edge : Ok gcc (Alpine 10.2.0) 10.2.0, Alpine clang version 10.0.1
11 66.76 alt:p8 : Ok x86_64-alt-linux-gcc (GCC) 5.3.1 20151207 (ALT p8 5.3.1-alt3.M80P.1), clang version 3.8.0 (tags/RELEASE_380/final)
12 83.71 alt:p9 : Ok x86_64-alt-linux-gcc (GCC) 8.4.1 20200305 (ALT p9 8.4.1-alt0.p9.1), clang version 10.0.0
13 80.70 alt:sisyphus : Ok x86_64-alt-linux-gcc (GCC) 9.3.1 20200518 (ALT Sisyphus 9.3.1-alt1), clang version 10.0.1
14 62.75 amazonlinux:1 : Ok gcc (GCC) 7.2.1 20170915 (Red Hat 7.2.1-2), clang version 3.6.2 (tags/RELEASE_362/final)
15 97.65 amazonlinux:2 : Ok gcc (GCC) 7.3.1 20180712 (Red Hat 7.3.1-12), clang version 7.0.1 (Amazon Linux 2 7.0.1-1.amzn2.0.2)
16 21.18 android-ndk:r12b-arm : Ok arm-linux-androideabi-gcc (GCC) 4.9.x 20150123 (prerelease)
17 21.07 android-ndk:r15c-arm : Ok arm-linux-androideabi-gcc (GCC) 4.9.x 20150123 (prerelease)
18 25.83 centos:6 : Ok gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-23)
19 30.65 centos:7 : Ok gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44)
20 93.44 centos:8 : Ok gcc (GCC) 8.3.1 20191121 (Red Hat 8.3.1-5), clang version 10.0.1 (Red Hat 10.0.1-1.module_el8.3.0+467+cb298d5b)
21 60.64 clearlinux:latest : Ok gcc (Clear Linux OS for Intel Architecture) 10.2.1 20201217 releases/gcc-10.2.0-643-g7cbb07d2fc, clang version 10.0.1
22 74.57 debian:8 : Ok gcc (Debian 4.9.2-10+deb8u2) 4.9.2, Debian clang version 3.5.0-10 (tags/RELEASE_350/final) (based on LLVM 3.5.0)
23 75.40 debian:9 : Ok gcc (Debian 6.3.0-18+deb9u1) 6.3.0 20170516, clang version 3.8.1-24 (tags/RELEASE_381/final)
24 72.75 debian:10 : Ok gcc (Debian 8.3.0-6) 8.3.0, clang version 7.0.1-8+deb10u2 (tags/RELEASE_701/final)
25 72.36 debian:experimental : Ok gcc (Debian 10.2.1-6) 10.2.1 20210110, Debian clang version 11.0.1-2
26 32.35 debian:experimental-x-arm64 : Ok aarch64-linux-gnu-gcc (Debian 10.2.1-6) 10.2.1 20210110
27 28.65 debian:experimental-x-mips64 : Ok mips64-linux-gnuabi64-gcc (Debian 10.2.1-3) 10.2.1 20201224
28 13.79 debian:experimental-x-mipsel : FAIL mipsel-linux-gnu-gcc (Debian 10.2.1-3) 10.2.1 20201224
CC /tmp/build/perf/util/map.o
util/map.c: In function 'map__new':
util/map.c:109:5: error: '%s' directive output may be truncated writing between 1 and 2147483645 bytes into a region of size 4096 [-Werror=format-truncation=]
109 | "%s/platforms/%s/arch-%s/usr/lib/%s",
| ^~
In file included from /usr/mipsel-linux-gnu/include/stdio.h:867,
from util/symbol.h:11,
from util/map.c:2:
/usr/mipsel-linux-gnu/include/bits/stdio2.h:67:10: note: '__builtin___snprintf_chk' output 32 or more bytes (assuming 4294967321) into a destination of size 4096
67 | return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
68 | __bos (__s), __fmt, __va_arg_pack ());
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29 29.14 fedora:20 : Ok gcc (GCC) 4.8.3 20140911 (Red Hat 4.8.3-7)
30 30.66 fedora:22 : Ok gcc (GCC) 5.3.1 20160406 (Red Hat 5.3.1-6), clang version 3.5.0 (tags/RELEASE_350/final)
31 66.33 fedora:23 : Ok gcc (GCC) 5.3.1 20160406 (Red Hat 5.3.1-6), clang version 3.7.0 (tags/RELEASE_370/final)
32 77.51 fedora:24 : Ok gcc (GCC) 6.3.1 20161221 (Red Hat 6.3.1-1), clang version 3.8.1 (tags/RELEASE_381/final)
33 25.23 fedora:24-x-ARC-uClibc : Ok arc-linux-gcc (ARCompact ISA Linux uClibc toolchain 2017.09-rc2) 7.1.1 20170710
34 79.68 fedora:25 : Ok gcc (GCC) 6.4.1 20170727 (Red Hat 6.4.1-1), clang version 3.9.1 (tags/RELEASE_391/final)
35 93.09 fedora:26 : Ok gcc (GCC) 7.3.1 20180130 (Red Hat 7.3.1-2), clang version 4.0.1 (tags/RELEASE_401/final)
36 94.12 fedora:27 : Ok gcc (GCC) 7.3.1 20180712 (Red Hat 7.3.1-6), clang version 5.0.2 (tags/RELEASE_502/final)
37 101.97 fedora:28 : Ok gcc (GCC) 8.3.1 20190223 (Red Hat 8.3.1-2), clang version 6.0.1 (tags/RELEASE_601/final)
38 107.51 fedora:29 : Ok gcc (GCC) 8.3.1 20190223 (Red Hat 8.3.1-2), clang version 7.0.1 (Fedora 7.0.1-6.fc29)
39 111.24 fedora:30 : Ok gcc (GCC) 9.3.1 20200408 (Red Hat 9.3.1-2), clang version 8.0.0 (Fedora 8.0.0-3.fc30)
40 25.85 fedora:30-x-ARC-uClibc : Ok arc-linux-gcc (ARCv2 ISA Linux uClibc toolchain 2019.03-rc1) 8.3.1 20190225
41 110.61 fedora:31 : Ok gcc (GCC) 9.3.1 20200408 (Red Hat 9.3.1-2), clang version 9.0.1 (Fedora 9.0.1-4.fc31)
42 93.78 fedora:32 : Ok gcc (GCC) 10.2.1 20201016 (Red Hat 10.2.1-6), clang version 10.0.1 (Fedora 10.0.1-3.fc32)
43 91.51 fedora:33 : Ok gcc (GCC) 10.2.1 20201125 (Red Hat 10.2.1-9), clang version 11.0.0 (Fedora 11.0.0-2.fc33)
44 92.75 fedora:34 : Ok gcc (GCC) 11.0.0 20210113 (Red Hat 11.0.0-0), clang version 11.0.1 (Fedora 11.0.1-4.fc34)
45 92.33 fedora:rawhide : Ok gcc (GCC) 11.0.0 20210109 (Red Hat 11.0.0-0), clang version 11.0.1 (Fedora 11.0.1-4.fc34)
46 33.58 gentoo-stage3-amd64:latest : Ok gcc (Gentoo 9.3.0-r1 p3) 9.3.0
47 66.03 mageia:5 : Ok gcc (GCC) 4.9.2, clang version 3.5.2 (tags/RELEASE_352/final)
48 84.73 mageia:6 : Ok gcc (Mageia 5.5.0-1.mga6) 5.5.0, clang version 3.9.1 (tags/RELEASE_391/final)
49 98.35 manjaro:latest : Ok gcc (GCC) 10.2.0, clang version 10.0.1
50 223.15 openmandriva:cooker : Ok gcc (GCC) 10.2.0 20200723 (OpenMandriva), OpenMandriva 11.0.0-1 clang version 11.0.0 (/builddir/build/BUILD/llvm-project-llvmorg-11.0.0/clang 63e22714ac938c6b537bd958f70680d3331a2030)
51 117.30 opensuse:15.0 : Ok gcc (SUSE Linux) 7.4.1 20190905 [gcc-7-branch revision 275407], clang version 5.0.1 (tags/RELEASE_501/final 312548)
52 124.82 opensuse:15.1 : Ok gcc (SUSE Linux) 7.5.0, clang version 7.0.1 (tags/RELEASE_701/final 349238)
53 113.33 opensuse:15.2 : Ok gcc (SUSE Linux) 7.5.0, clang version 9.0.1
54 106.17 opensuse:42.3 : Ok gcc (SUSE Linux) 4.8.5, clang version 3.8.0 (tags/RELEASE_380/final 262553)
55 108.15 opensuse:tumbleweed : Ok gcc (SUSE Linux) 10.2.1 20200825 [revision c0746a1beb1ba073c7981eb09f55b3d993b32e5c], clang version 10.0.1
56 25.57 oraclelinux:6 : Ok gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-23.0.1)
57 30.86 oraclelinux:7 : Ok gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44.0.3)
58 91.75 oraclelinux:8 : Ok gcc (GCC) 8.3.1 20191121 (Red Hat 8.3.1-5.0.1), clang version 10.0.1 (Red Hat 10.0.1-1.0.1.module+el8.3.0+7827+89335dbf)
59 27.64 ubuntu:12.04 : Ok gcc (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3, Ubuntu clang version 3.0-6ubuntu3 (tags/RELEASE_30/final) (based on LLVM 3.0)
60 29.65 ubuntu:14.04 : Ok gcc (Ubuntu 4.8.4-2ubuntu1~14.04.4) 4.8.4
61 75.65 ubuntu:16.04 : Ok gcc (Ubuntu 5.4.0-6ubuntu1~16.04.12) 5.4.0 20160609, clang version 3.8.0-2ubuntu4 (tags/RELEASE_380/final)
62 25.57 ubuntu:16.04-x-arm : Ok arm-linux-gnueabihf-gcc (Ubuntu/Linaro 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
63 25.52 ubuntu:16.04-x-arm64 : Ok aarch64-linux-gnu-gcc (Ubuntu/Linaro 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
64 25.01 ubuntu:16.04-x-powerpc : Ok powerpc-linux-gnu-gcc (Ubuntu 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
65 25.51 ubuntu:16.04-x-powerpc64 : Ok powerpc64-linux-gnu-gcc (Ubuntu/IBM 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
66 25.70 ubuntu:16.04-x-powerpc64el : Ok powerpc64le-linux-gnu-gcc (Ubuntu/IBM 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
67 24.95 ubuntu:16.04-x-s390 : Ok s390x-linux-gnu-gcc (Ubuntu 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
68 87.96 ubuntu:18.04 : Ok gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0, clang version 6.0.0-1ubuntu2 (tags/RELEASE_600/final)
69 27.40 ubuntu:18.04-x-arm : Ok arm-linux-gnueabihf-gcc (Ubuntu/Linaro 7.5.0-3ubuntu1~18.04) 7.5.0
70 27.14 ubuntu:18.04-x-arm64 : Ok aarch64-linux-gnu-gcc (Ubuntu/Linaro 7.5.0-3ubuntu1~18.04) 7.5.0
71 22.68 ubuntu:18.04-x-m68k : Ok m68k-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
72 26.52 ubuntu:18.04-x-powerpc : Ok powerpc-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
73 28.97 ubuntu:18.04-x-powerpc64 : Ok powerpc64-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
74 28.54 ubuntu:18.04-x-powerpc64el : Ok powerpc64le-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
75 163.57 ubuntu:18.04-x-riscv64 : Ok riscv64-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
76 24.07 ubuntu:18.04-x-s390 : Ok s390x-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
77 26.77 ubuntu:18.04-x-sh4 : Ok sh4-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
78 24.00 ubuntu:18.04-x-sparc64 : Ok sparc64-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
79 69.36 ubuntu:19.10 : Ok gcc (Ubuntu 9.2.1-9ubuntu2) 9.2.1 20191008, clang version 8.0.1-3build1 (tags/RELEASE_801/final)
80 27.07 ubuntu:19.10-x-alpha : Ok alpha-linux-gnu-gcc (Ubuntu 9.2.1-9ubuntu1) 9.2.1 20191008
81 24.29 ubuntu:19.10-x-hppa : Ok hppa-linux-gnu-gcc (Ubuntu 9.2.1-9ubuntu1) 9.2.1 20191008
82 74.99 ubuntu:20.04 : Ok gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0, clang version 10.0.0-4ubuntu1
83 30.49 ubuntu:20.04-x-powerpc64el : Ok powerpc64le-linux-gnu-gcc (Ubuntu 10.2.0-5ubuntu1~20.04) 10.2.0
84 73.54 ubuntu:20.10 : Ok gcc (Ubuntu 10.2.0-13ubuntu1) 10.2.0, Ubuntu clang version 11.0.0-2
$
# uname -a
Linux quaco 5.10.7-100.fc32.x86_64 #1 SMP Tue Jan 12 20:25:28 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux
# git log --oneline -1
648b054a46
perf inject: Correct event attribute sizes
# perf version --build-options
perf version 5.11.rc3.g648b054a4647
dwarf: [ on ] # HAVE_DWARF_SUPPORT
dwarf_getlocations: [ on ] # HAVE_DWARF_GETLOCATIONS_SUPPORT
glibc: [ on ] # HAVE_GLIBC_SUPPORT
syscall_table: [ on ] # HAVE_SYSCALL_TABLE_SUPPORT
libbfd: [ on ] # HAVE_LIBBFD_SUPPORT
libelf: [ on ] # HAVE_LIBELF_SUPPORT
libnuma: [ on ] # HAVE_LIBNUMA_SUPPORT
numa_num_possible_cpus: [ on ] # HAVE_LIBNUMA_SUPPORT
libperl: [ on ] # HAVE_LIBPERL_SUPPORT
libpython: [ on ] # HAVE_LIBPYTHON_SUPPORT
libslang: [ on ] # HAVE_SLANG_SUPPORT
libcrypto: [ on ] # HAVE_LIBCRYPTO_SUPPORT
libunwind: [ on ] # HAVE_LIBUNWIND_SUPPORT
libdw-dwarf-unwind: [ on ] # HAVE_DWARF_SUPPORT
zlib: [ on ] # HAVE_ZLIB_SUPPORT
lzma: [ on ] # HAVE_LZMA_SUPPORT
get_cpuid: [ on ] # HAVE_AUXTRACE_SUPPORT
bpf: [ on ] # HAVE_LIBBPF_SUPPORT
aio: [ on ] # HAVE_AIO_SUPPORT
zstd: [ on ] # HAVE_ZSTD_SUPPORT
libpfm4: [ OFF ] # HAVE_LIBPFM
# perf test
1: vmlinux symtab matches kallsyms : Ok
2: Detect openat syscall event : Ok
3: Detect openat syscall event on all cpus : Ok
4: Read samples using the mmap interface : Ok
5: Test data source output : Ok
6: Parse event definition strings : Ok
7: Simple expression parser : Ok
8: PERF_RECORD_* events & perf_sample fields : Ok
9: Parse perf pmu format : Ok
10: PMU events :
10.1: PMU event table sanity : Ok
10.2: PMU event map aliases : Ok
10.3: Parsing of PMU event table metrics : Ok
10.4: Parsing of PMU event table metrics with fake PMUs : Ok
11: DSO data read : Ok
12: DSO data cache : Ok
13: DSO data reopen : Ok
14: Roundtrip evsel->name : Ok
15: Parse sched tracepoints fields : Ok
16: syscalls:sys_enter_openat event fields : Ok
17: Setup struct perf_event_attr : Ok
18: Match and link multiple hists : Ok
19: 'import perf' in python : Ok
20: Breakpoint overflow signal handler : Ok
21: Breakpoint overflow sampling : Ok
22: Breakpoint accounting : Ok
23: Watchpoint :
23.1: Read Only Watchpoint : Skip (missing hardware support)
23.2: Write Only Watchpoint : Ok
23.3: Read / Write Watchpoint : Ok
23.4: Modify Watchpoint : Ok
24: Number of exit events of a simple workload : Ok
25: Software clock events period values : Ok
26: Object code reading : Ok
27: Sample parsing : Ok
28: Use a dummy software event to keep tracking : Ok
29: Parse with no sample_id_all bit set : Ok
30: Filter hist entries : Ok
31: Lookup mmap thread : Ok
32: Share thread maps : Ok
33: Sort output of hist entries : Ok
34: Cumulate child hist entries : Ok
35: Track with sched_switch : Ok
36: Filter fds with revents mask in a fdarray : Ok
37: Add fd to a fdarray, making it autogrow : Ok
38: kmod_path__parse : Ok
39: Thread map : Ok
40: LLVM search and compile :
40.1: Basic BPF llvm compile : Ok
40.2: kbuild searching : Ok
40.3: Compile source for BPF prologue generation : Ok
40.4: Compile source for BPF relocation : Ok
41: Session topology : Ok
42: BPF filter :
42.1: Basic BPF filtering : Ok
42.2: BPF pinning : Ok
42.3: BPF prologue generation : Ok
42.4: BPF relocation checker : Ok
43: Synthesize thread map : Ok
44: Remove thread map : Ok
45: Synthesize cpu map : Ok
46: Synthesize stat config : Ok
47: Synthesize stat : Ok
48: Synthesize stat round : Ok
49: Synthesize attr update : Ok
50: Event times : Ok
51: Read backward ring buffer : Ok
52: Print cpu map : Ok
53: Merge cpu map : Ok
54: Probe SDT events : Ok
55: is_printable_array : Ok
56: Print bitmap : Ok
57: perf hooks : Ok
58: builtin clang support : Skip (not compiled in)
59: unit_number__scnprintf : Ok
60: mem2node : Ok
61: time utils : Ok
62: Test jit_write_elf : Ok
63: Test libpfm4 support : Skip (not compiled in)
64: Test api io : Ok
65: maps__merge_in : Ok
66: Demangle Java : Ok
67: Parse and process metrics : Ok
68: PE file support : Ok
69: Event expansion for cgroups : Ok
70: Convert perf time to TSC : Ok
71: x86 rdpmc : Ok
72: DWARF unwind : Ok
73: x86 instruction decoder - new instructions : Ok
74: Intel PT packet decoder : Ok
75: x86 bp modify : Ok
76: probe libc's inet_pton & backtrace it with ping : Ok
77: Use vfs_getname probe to get syscall args filenames : Ok
78: Check Arm CoreSight trace data recording and synthesized samples: Skip
79: perf stat metrics (shadow stat) test : Ok
80: build id cache operations : Ok
81: Add vfs_getname probe to get syscall args filenames : Ok
82: Check open filename arg using perf trace + vfs_getname : Ok
83: Zstd perf.data compression/decompression : Ok
$ make -C tools/perf build-test
make: Entering directory '/home/acme/git/perf/tools/perf'
- tarpkg: ./tests/perf-targz-src-pkg .
make_no_libpython_O: make NO_LIBPYTHON=1
make_no_sdt_O: make NO_SDT=1
make_tags_O: make tags
make_install_O: make install
make_install_bin_O: make install-bin
make_debug_O: make DEBUG=1
make_no_libdw_dwarf_unwind_O: make NO_LIBDW_DWARF_UNWIND=1
make_no_libelf_O: make NO_LIBELF=1
make_cscope_O: make cscope
make_no_backtrace_O: make NO_BACKTRACE=1
make_no_libnuma_O: make NO_LIBNUMA=1
make_no_ui_O: make NO_NEWT=1 NO_SLANG=1 NO_GTK2=1
make_no_newt_O: make NO_NEWT=1
make_with_babeltrace_O: make LIBBABELTRACE=1
make_util_pmu_bison_o_O: make util/pmu-bison.o
make_no_libunwind_O: make NO_LIBUNWIND=1
make_no_libbpf_DEBUG_O: make NO_LIBBPF=1 DEBUG=1
make_doc_O: make doc
make_perf_o_O: make perf.o
make_no_gtk2_O: make NO_GTK2=1
make_with_clangllvm_O: make LIBCLANGLLVM=1
make_clean_all_O: make clean all
make_no_demangle_O: make NO_DEMANGLE=1
make_with_gtk2_O: make GTK2=1
make_util_map_o_O: make util/map.o
make_pure_O: make
make_no_libbionic_O: make NO_LIBBIONIC=1
make_no_libaudit_O: make NO_LIBAUDIT=1
make_no_libbpf_O: make NO_LIBBPF=1
make_install_prefix_slash_O: make install prefix=/tmp/krava/
make_help_O: make help
make_no_syscall_tbl_O: make NO_SYSCALL_TABLE=1
make_no_scripts_O: make NO_LIBPYTHON=1 NO_LIBPERL=1
make_minimal_O: make NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1 NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1 NO_LIBZSTD=1 NO_LIBCAP=1 NO_SYSCALL_TABLE=1
make_no_libcrypto_O: make NO_LIBCRYPTO=1
make_static_O: make LDFLAGS=-static NO_PERF_READ_VDSO32=1 NO_PERF_READ_VDSOX32=1 NO_JVMTI=1
make_install_prefix_O: make install prefix=/tmp/krava
make_no_auxtrace_O: make NO_AUXTRACE=1
make_with_libpfm4_O: make LIBPFM4=1
make_no_libperl_O: make NO_LIBPERL=1
make_no_slang_O: make NO_SLANG=1
OK
make: Leaving directory '/home/acme/git/perf/tools/perf'
$
-----BEGIN PGP SIGNATURE-----
iHUEABYIAB0WIQR2GiIUctdOfX2qHhGyPKLppCJ+JwUCYASljgAKCRCyPKLppCJ+
J/E/AQCOGFqF7UmEzuuTecWeeBNCwVyD3woHLU13ll/e5VLNggD/YD9t8CZS+vwy
21yL4/yXZloLFE48OCLRNWeq91FL/gs=
=uZDD
-----END PGP SIGNATURE-----
Merge tag 'perf-tools-fixes-2021-01-17' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tools fixes from Arnaldo Carvalho de Melo:
- Fix 'CPU too large' error in Intel PT
- Correct event attribute sizes in 'perf inject'
- Sync build_bug.h and kvm.h kernel copies
- Fix bpf.h header include directive in 5sec.c 'perf trace' bpf example
- libbpf tests fixes
- Fix shadow stat 'perf test' for non-bash shells
- Take cgroups into account for shadow stats in 'perf stat'
* tag 'perf-tools-fixes-2021-01-17' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux:
perf inject: Correct event attribute sizes
perf intel-pt: Fix 'CPU too large' error
perf stat: Take cgroups into account for shadow stats
perf stat: Introduce struct runtime_stat_data
libperf tests: Fail when failing to get a tracepoint id
libperf tests: If a test fails return non-zero
libperf tests: Avoid uninitialized variable warning
perf test: Fix shadow stat test for non-bash shells
tools headers: Syncronize linux/build_bug.h with the kernel sources
tools headers UAPI: Sync kvm.h headers with the kernel sources
perf bpf examples: Fix bpf.h header include directive in 5sec.c example
This commit is contained in:
commit
e2da783614
12 changed files with 224 additions and 208 deletions
|
@ -79,9 +79,4 @@
|
|||
#define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
|
||||
#endif // static_assert
|
||||
|
||||
#ifdef __GENKSYMS__
|
||||
/* genksyms gets confused by _Static_assert */
|
||||
#define _Static_assert(expr, ...)
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_BUILD_BUG_H */
|
||||
|
|
|
@ -251,6 +251,7 @@ struct kvm_hyperv_exit {
|
|||
#define KVM_EXIT_X86_RDMSR 29
|
||||
#define KVM_EXIT_X86_WRMSR 30
|
||||
#define KVM_EXIT_DIRTY_RING_FULL 31
|
||||
#define KVM_EXIT_AP_RESET_HOLD 32
|
||||
|
||||
/* For KVM_EXIT_INTERNAL_ERROR */
|
||||
/* Emulate instruction failed. */
|
||||
|
@ -573,6 +574,7 @@ struct kvm_vapic_addr {
|
|||
#define KVM_MP_STATE_CHECK_STOP 6
|
||||
#define KVM_MP_STATE_OPERATING 7
|
||||
#define KVM_MP_STATE_LOAD 8
|
||||
#define KVM_MP_STATE_AP_RESET_HOLD 9
|
||||
|
||||
struct kvm_mp_state {
|
||||
__u32 mp_state;
|
||||
|
|
|
@ -27,5 +27,5 @@ int main(int argc, char **argv)
|
|||
perf_cpu_map__put(cpus);
|
||||
|
||||
__T_END;
|
||||
return 0;
|
||||
return tests_failed == 0 ? 0 : -1;
|
||||
}
|
||||
|
|
|
@ -208,13 +208,13 @@ static int test_mmap_thread(void)
|
|||
char path[PATH_MAX];
|
||||
int id, err, pid, go_pipe[2];
|
||||
union perf_event *event;
|
||||
char bf;
|
||||
int count = 0;
|
||||
|
||||
snprintf(path, PATH_MAX, "%s/kernel/debug/tracing/events/syscalls/sys_enter_prctl/id",
|
||||
sysfs__mountpoint());
|
||||
|
||||
if (filename__read_int(path, &id)) {
|
||||
tests_failed++;
|
||||
fprintf(stderr, "error: failed to get tracepoint id: %s\n", path);
|
||||
return -1;
|
||||
}
|
||||
|
@ -229,6 +229,7 @@ static int test_mmap_thread(void)
|
|||
pid = fork();
|
||||
if (!pid) {
|
||||
int i;
|
||||
char bf;
|
||||
|
||||
read(go_pipe[0], &bf, 1);
|
||||
|
||||
|
@ -266,7 +267,7 @@ static int test_mmap_thread(void)
|
|||
perf_evlist__enable(evlist);
|
||||
|
||||
/* kick the child and wait for it to finish */
|
||||
write(go_pipe[1], &bf, 1);
|
||||
write(go_pipe[1], "A", 1);
|
||||
waitpid(pid, NULL, 0);
|
||||
|
||||
/*
|
||||
|
@ -409,5 +410,5 @@ int main(int argc, char **argv)
|
|||
test_mmap_cpus();
|
||||
|
||||
__T_END;
|
||||
return 0;
|
||||
return tests_failed == 0 ? 0 : -1;
|
||||
}
|
||||
|
|
|
@ -131,5 +131,5 @@ int main(int argc, char **argv)
|
|||
test_stat_thread_enable();
|
||||
|
||||
__T_END;
|
||||
return 0;
|
||||
return tests_failed == 0 ? 0 : -1;
|
||||
}
|
||||
|
|
|
@ -27,5 +27,5 @@ int main(int argc, char **argv)
|
|||
perf_thread_map__put(threads);
|
||||
|
||||
__T_END;
|
||||
return 0;
|
||||
return tests_failed == 0 ? 0 : -1;
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@
|
|||
Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo <acme@redhat.com>
|
||||
*/
|
||||
|
||||
#include <bpf/bpf.h>
|
||||
#include <bpf.h>
|
||||
|
||||
#define NSEC_PER_SEC 1000000000L
|
||||
|
||||
|
|
|
@ -9,31 +9,29 @@ perf stat -a true > /dev/null 2>&1 || exit 2
|
|||
|
||||
test_global_aggr()
|
||||
{
|
||||
local cyc
|
||||
|
||||
perf stat -a --no-big-num -e cycles,instructions sleep 1 2>&1 | \
|
||||
grep -e cycles -e instructions | \
|
||||
while read num evt hash ipc rest
|
||||
do
|
||||
# skip not counted events
|
||||
if [[ $num == "<not" ]]; then
|
||||
if [ "$num" = "<not" ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
# save cycles count
|
||||
if [[ $evt == "cycles" ]]; then
|
||||
if [ "$evt" = "cycles" ]; then
|
||||
cyc=$num
|
||||
continue
|
||||
fi
|
||||
|
||||
# skip if no cycles
|
||||
if [[ -z $cyc ]]; then
|
||||
if [ -z "$cyc" ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
# use printf for rounding and a leading zero
|
||||
local res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)`
|
||||
if [[ $ipc != $res ]]; then
|
||||
res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)`
|
||||
if [ "$ipc" != "$res" ]; then
|
||||
echo "IPC is different: $res != $ipc ($num / $cyc)"
|
||||
exit 1
|
||||
fi
|
||||
|
@ -42,32 +40,32 @@ test_global_aggr()
|
|||
|
||||
test_no_aggr()
|
||||
{
|
||||
declare -A results
|
||||
|
||||
perf stat -a -A --no-big-num -e cycles,instructions sleep 1 2>&1 | \
|
||||
grep ^CPU | \
|
||||
while read cpu num evt hash ipc rest
|
||||
do
|
||||
# skip not counted events
|
||||
if [[ $num == "<not" ]]; then
|
||||
if [ "$num" = "<not" ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
# save cycles count
|
||||
if [[ $evt == "cycles" ]]; then
|
||||
results[$cpu]=$num
|
||||
if [ "$evt" = "cycles" ]; then
|
||||
results="$results $cpu:$num"
|
||||
continue
|
||||
fi
|
||||
|
||||
cyc=${results##* $cpu:}
|
||||
cyc=${cyc%% *}
|
||||
|
||||
# skip if no cycles
|
||||
local cyc=${results[$cpu]}
|
||||
if [[ -z $cyc ]]; then
|
||||
if [ -z "$cyc" ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
# use printf for rounding and a leading zero
|
||||
local res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)`
|
||||
if [[ $ipc != $res ]]; then
|
||||
res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)`
|
||||
if [ "$ipc" != "$res" ]; then
|
||||
echo "IPC is different for $cpu: $res != $ipc ($num / $cyc)"
|
||||
exit 1
|
||||
fi
|
||||
|
|
|
@ -3323,6 +3323,14 @@ int perf_session__write_header(struct perf_session *session,
|
|||
attr_offset = lseek(ff.fd, 0, SEEK_CUR);
|
||||
|
||||
evlist__for_each_entry(evlist, evsel) {
|
||||
if (evsel->core.attr.size < sizeof(evsel->core.attr)) {
|
||||
/*
|
||||
* We are likely in "perf inject" and have read
|
||||
* from an older file. Update attr size so that
|
||||
* reader gets the right offset to the ids.
|
||||
*/
|
||||
evsel->core.attr.size = sizeof(evsel->core.attr);
|
||||
}
|
||||
f_attr = (struct perf_file_attr){
|
||||
.attr = evsel->core.attr,
|
||||
.ids = {
|
||||
|
|
|
@ -2980,7 +2980,7 @@ int machines__for_each_thread(struct machines *machines,
|
|||
|
||||
pid_t machine__get_current_tid(struct machine *machine, int cpu)
|
||||
{
|
||||
int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS);
|
||||
int nr_cpus = min(machine->env->nr_cpus_avail, MAX_NR_CPUS);
|
||||
|
||||
if (cpu < 0 || cpu >= nr_cpus || !machine->current_tid)
|
||||
return -1;
|
||||
|
@ -2992,7 +2992,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
|
|||
pid_t tid)
|
||||
{
|
||||
struct thread *thread;
|
||||
int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS);
|
||||
int nr_cpus = min(machine->env->nr_cpus_avail, MAX_NR_CPUS);
|
||||
|
||||
if (cpu < 0)
|
||||
return -EINVAL;
|
||||
|
|
|
@ -2404,7 +2404,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
|
|||
{
|
||||
int i, err = -1;
|
||||
struct perf_cpu_map *map;
|
||||
int nr_cpus = min(session->header.env.nr_cpus_online, MAX_NR_CPUS);
|
||||
int nr_cpus = min(session->header.env.nr_cpus_avail, MAX_NR_CPUS);
|
||||
|
||||
for (i = 0; i < PERF_TYPE_MAX; ++i) {
|
||||
struct evsel *evsel;
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "evlist.h"
|
||||
#include "expr.h"
|
||||
#include "metricgroup.h"
|
||||
#include "cgroup.h"
|
||||
#include <linux/zalloc.h>
|
||||
|
||||
/*
|
||||
|
@ -28,6 +29,7 @@ struct saved_value {
|
|||
enum stat_type type;
|
||||
int ctx;
|
||||
int cpu;
|
||||
struct cgroup *cgrp;
|
||||
struct runtime_stat *stat;
|
||||
struct stats stats;
|
||||
u64 metric_total;
|
||||
|
@ -57,6 +59,9 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
|
|||
if (a->ctx != b->ctx)
|
||||
return a->ctx - b->ctx;
|
||||
|
||||
if (a->cgrp != b->cgrp)
|
||||
return (char *)a->cgrp < (char *)b->cgrp ? -1 : +1;
|
||||
|
||||
if (a->evsel == NULL && b->evsel == NULL) {
|
||||
if (a->stat == b->stat)
|
||||
return 0;
|
||||
|
@ -100,7 +105,8 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel,
|
|||
bool create,
|
||||
enum stat_type type,
|
||||
int ctx,
|
||||
struct runtime_stat *st)
|
||||
struct runtime_stat *st,
|
||||
struct cgroup *cgrp)
|
||||
{
|
||||
struct rblist *rblist;
|
||||
struct rb_node *nd;
|
||||
|
@ -110,10 +116,15 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel,
|
|||
.type = type,
|
||||
.ctx = ctx,
|
||||
.stat = st,
|
||||
.cgrp = cgrp,
|
||||
};
|
||||
|
||||
rblist = &st->value_list;
|
||||
|
||||
/* don't use context info for clock events */
|
||||
if (type == STAT_NSECS)
|
||||
dm.ctx = 0;
|
||||
|
||||
nd = rblist__find(rblist, &dm);
|
||||
if (nd)
|
||||
return container_of(nd, struct saved_value, rb_node);
|
||||
|
@ -191,12 +202,18 @@ void perf_stat__reset_shadow_per_stat(struct runtime_stat *st)
|
|||
reset_stat(st);
|
||||
}
|
||||
|
||||
struct runtime_stat_data {
|
||||
int ctx;
|
||||
struct cgroup *cgrp;
|
||||
};
|
||||
|
||||
static void update_runtime_stat(struct runtime_stat *st,
|
||||
enum stat_type type,
|
||||
int ctx, int cpu, u64 count)
|
||||
int cpu, u64 count,
|
||||
struct runtime_stat_data *rsd)
|
||||
{
|
||||
struct saved_value *v = saved_value_lookup(NULL, cpu, true,
|
||||
type, ctx, st);
|
||||
struct saved_value *v = saved_value_lookup(NULL, cpu, true, type,
|
||||
rsd->ctx, st, rsd->cgrp);
|
||||
|
||||
if (v)
|
||||
update_stats(&v->stats, count);
|
||||
|
@ -210,82 +227,86 @@ static void update_runtime_stat(struct runtime_stat *st,
|
|||
void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
|
||||
int cpu, struct runtime_stat *st)
|
||||
{
|
||||
int ctx = evsel_context(counter);
|
||||
u64 count_ns = count;
|
||||
struct saved_value *v;
|
||||
struct runtime_stat_data rsd = {
|
||||
.ctx = evsel_context(counter),
|
||||
.cgrp = counter->cgrp,
|
||||
};
|
||||
|
||||
count *= counter->scale;
|
||||
|
||||
if (evsel__is_clock(counter))
|
||||
update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns);
|
||||
update_runtime_stat(st, STAT_NSECS, cpu, count_ns, &rsd);
|
||||
else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
|
||||
update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count);
|
||||
update_runtime_stat(st, STAT_CYCLES, cpu, count, &rsd);
|
||||
else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
|
||||
update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count);
|
||||
update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu, count, &rsd);
|
||||
else if (perf_stat_evsel__is(counter, TRANSACTION_START))
|
||||
update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count);
|
||||
update_runtime_stat(st, STAT_TRANSACTION, cpu, count, &rsd);
|
||||
else if (perf_stat_evsel__is(counter, ELISION_START))
|
||||
update_runtime_stat(st, STAT_ELISION, ctx, cpu, count);
|
||||
update_runtime_stat(st, STAT_ELISION, cpu, count, &rsd);
|
||||
else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
|
||||
update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS,
|
||||
ctx, cpu, count);
|
||||
cpu, count, &rsd);
|
||||
else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
|
||||
update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED,
|
||||
ctx, cpu, count);
|
||||
cpu, count, &rsd);
|
||||
else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
|
||||
update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED,
|
||||
ctx, cpu, count);
|
||||
cpu, count, &rsd);
|
||||
else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
|
||||
update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES,
|
||||
ctx, cpu, count);
|
||||
cpu, count, &rsd);
|
||||
else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
|
||||
update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
|
||||
ctx, cpu, count);
|
||||
cpu, count, &rsd);
|
||||
else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING))
|
||||
update_runtime_stat(st, STAT_TOPDOWN_RETIRING,
|
||||
ctx, cpu, count);
|
||||
cpu, count, &rsd);
|
||||
else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC))
|
||||
update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC,
|
||||
ctx, cpu, count);
|
||||
cpu, count, &rsd);
|
||||
else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND))
|
||||
update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND,
|
||||
ctx, cpu, count);
|
||||
cpu, count, &rsd);
|
||||
else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND))
|
||||
update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND,
|
||||
ctx, cpu, count);
|
||||
cpu, count, &rsd);
|
||||
else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
|
||||
update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
|
||||
ctx, cpu, count);
|
||||
cpu, count, &rsd);
|
||||
else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
|
||||
update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
|
||||
ctx, cpu, count);
|
||||
cpu, count, &rsd);
|
||||
else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
|
||||
update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count);
|
||||
update_runtime_stat(st, STAT_BRANCHES, cpu, count, &rsd);
|
||||
else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
|
||||
update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count);
|
||||
update_runtime_stat(st, STAT_CACHEREFS, cpu, count, &rsd);
|
||||
else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
|
||||
update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count);
|
||||
update_runtime_stat(st, STAT_L1_DCACHE, cpu, count, &rsd);
|
||||
else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
|
||||
update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count);
|
||||
update_runtime_stat(st, STAT_L1_ICACHE, cpu, count, &rsd);
|
||||
else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL))
|
||||
update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count);
|
||||
update_runtime_stat(st, STAT_LL_CACHE, cpu, count, &rsd);
|
||||
else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
|
||||
update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count);
|
||||
update_runtime_stat(st, STAT_DTLB_CACHE, cpu, count, &rsd);
|
||||
else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
|
||||
update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count);
|
||||
update_runtime_stat(st, STAT_ITLB_CACHE, cpu, count, &rsd);
|
||||
else if (perf_stat_evsel__is(counter, SMI_NUM))
|
||||
update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count);
|
||||
update_runtime_stat(st, STAT_SMI_NUM, cpu, count, &rsd);
|
||||
else if (perf_stat_evsel__is(counter, APERF))
|
||||
update_runtime_stat(st, STAT_APERF, ctx, cpu, count);
|
||||
update_runtime_stat(st, STAT_APERF, cpu, count, &rsd);
|
||||
|
||||
if (counter->collect_stat) {
|
||||
v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st);
|
||||
v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st,
|
||||
rsd.cgrp);
|
||||
update_stats(&v->stats, count);
|
||||
if (counter->metric_leader)
|
||||
v->metric_total += count;
|
||||
} else if (counter->metric_leader) {
|
||||
v = saved_value_lookup(counter->metric_leader,
|
||||
cpu, true, STAT_NONE, 0, st);
|
||||
cpu, true, STAT_NONE, 0, st, rsd.cgrp);
|
||||
v->metric_total += count;
|
||||
v->metric_other++;
|
||||
}
|
||||
|
@ -422,11 +443,12 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list)
|
|||
}
|
||||
|
||||
static double runtime_stat_avg(struct runtime_stat *st,
|
||||
enum stat_type type, int ctx, int cpu)
|
||||
enum stat_type type, int cpu,
|
||||
struct runtime_stat_data *rsd)
|
||||
{
|
||||
struct saved_value *v;
|
||||
|
||||
v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
|
||||
v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp);
|
||||
if (!v)
|
||||
return 0.0;
|
||||
|
||||
|
@ -434,11 +456,12 @@ static double runtime_stat_avg(struct runtime_stat *st,
|
|||
}
|
||||
|
||||
static double runtime_stat_n(struct runtime_stat *st,
|
||||
enum stat_type type, int ctx, int cpu)
|
||||
enum stat_type type, int cpu,
|
||||
struct runtime_stat_data *rsd)
|
||||
{
|
||||
struct saved_value *v;
|
||||
|
||||
v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
|
||||
v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp);
|
||||
if (!v)
|
||||
return 0.0;
|
||||
|
||||
|
@ -446,16 +469,15 @@ static double runtime_stat_n(struct runtime_stat *st,
|
|||
}
|
||||
|
||||
static void print_stalled_cycles_frontend(struct perf_stat_config *config,
|
||||
int cpu,
|
||||
struct evsel *evsel, double avg,
|
||||
int cpu, double avg,
|
||||
struct perf_stat_output_ctx *out,
|
||||
struct runtime_stat *st)
|
||||
struct runtime_stat *st,
|
||||
struct runtime_stat_data *rsd)
|
||||
{
|
||||
double total, ratio = 0.0;
|
||||
const char *color;
|
||||
int ctx = evsel_context(evsel);
|
||||
|
||||
total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
|
||||
total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd);
|
||||
|
||||
if (total)
|
||||
ratio = avg / total * 100.0;
|
||||
|
@ -470,16 +492,15 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config,
|
|||
}
|
||||
|
||||
static void print_stalled_cycles_backend(struct perf_stat_config *config,
|
||||
int cpu,
|
||||
struct evsel *evsel, double avg,
|
||||
int cpu, double avg,
|
||||
struct perf_stat_output_ctx *out,
|
||||
struct runtime_stat *st)
|
||||
struct runtime_stat *st,
|
||||
struct runtime_stat_data *rsd)
|
||||
{
|
||||
double total, ratio = 0.0;
|
||||
const char *color;
|
||||
int ctx = evsel_context(evsel);
|
||||
|
||||
total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
|
||||
total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd);
|
||||
|
||||
if (total)
|
||||
ratio = avg / total * 100.0;
|
||||
|
@ -490,17 +511,15 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config,
|
|||
}
|
||||
|
||||
static void print_branch_misses(struct perf_stat_config *config,
|
||||
int cpu,
|
||||
struct evsel *evsel,
|
||||
double avg,
|
||||
int cpu, double avg,
|
||||
struct perf_stat_output_ctx *out,
|
||||
struct runtime_stat *st)
|
||||
struct runtime_stat *st,
|
||||
struct runtime_stat_data *rsd)
|
||||
{
|
||||
double total, ratio = 0.0;
|
||||
const char *color;
|
||||
int ctx = evsel_context(evsel);
|
||||
|
||||
total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu);
|
||||
total = runtime_stat_avg(st, STAT_BRANCHES, cpu, rsd);
|
||||
|
||||
if (total)
|
||||
ratio = avg / total * 100.0;
|
||||
|
@ -511,18 +530,15 @@ static void print_branch_misses(struct perf_stat_config *config,
|
|||
}
|
||||
|
||||
static void print_l1_dcache_misses(struct perf_stat_config *config,
|
||||
int cpu,
|
||||
struct evsel *evsel,
|
||||
double avg,
|
||||
int cpu, double avg,
|
||||
struct perf_stat_output_ctx *out,
|
||||
struct runtime_stat *st)
|
||||
|
||||
struct runtime_stat *st,
|
||||
struct runtime_stat_data *rsd)
|
||||
{
|
||||
double total, ratio = 0.0;
|
||||
const char *color;
|
||||
int ctx = evsel_context(evsel);
|
||||
|
||||
total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu);
|
||||
total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu, rsd);
|
||||
|
||||
if (total)
|
||||
ratio = avg / total * 100.0;
|
||||
|
@ -533,18 +549,15 @@ static void print_l1_dcache_misses(struct perf_stat_config *config,
|
|||
}
|
||||
|
||||
static void print_l1_icache_misses(struct perf_stat_config *config,
|
||||
int cpu,
|
||||
struct evsel *evsel,
|
||||
double avg,
|
||||
int cpu, double avg,
|
||||
struct perf_stat_output_ctx *out,
|
||||
struct runtime_stat *st)
|
||||
|
||||
struct runtime_stat *st,
|
||||
struct runtime_stat_data *rsd)
|
||||
{
|
||||
double total, ratio = 0.0;
|
||||
const char *color;
|
||||
int ctx = evsel_context(evsel);
|
||||
|
||||
total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu);
|
||||
total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu, rsd);
|
||||
|
||||
if (total)
|
||||
ratio = avg / total * 100.0;
|
||||
|
@ -554,17 +567,15 @@ static void print_l1_icache_misses(struct perf_stat_config *config,
|
|||
}
|
||||
|
||||
static void print_dtlb_cache_misses(struct perf_stat_config *config,
|
||||
int cpu,
|
||||
struct evsel *evsel,
|
||||
double avg,
|
||||
int cpu, double avg,
|
||||
struct perf_stat_output_ctx *out,
|
||||
struct runtime_stat *st)
|
||||
struct runtime_stat *st,
|
||||
struct runtime_stat_data *rsd)
|
||||
{
|
||||
double total, ratio = 0.0;
|
||||
const char *color;
|
||||
int ctx = evsel_context(evsel);
|
||||
|
||||
total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu);
|
||||
total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu, rsd);
|
||||
|
||||
if (total)
|
||||
ratio = avg / total * 100.0;
|
||||
|
@ -574,17 +585,15 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config,
|
|||
}
|
||||
|
||||
static void print_itlb_cache_misses(struct perf_stat_config *config,
|
||||
int cpu,
|
||||
struct evsel *evsel,
|
||||
double avg,
|
||||
int cpu, double avg,
|
||||
struct perf_stat_output_ctx *out,
|
||||
struct runtime_stat *st)
|
||||
struct runtime_stat *st,
|
||||
struct runtime_stat_data *rsd)
|
||||
{
|
||||
double total, ratio = 0.0;
|
||||
const char *color;
|
||||
int ctx = evsel_context(evsel);
|
||||
|
||||
total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu);
|
||||
total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu, rsd);
|
||||
|
||||
if (total)
|
||||
ratio = avg / total * 100.0;
|
||||
|
@ -594,17 +603,15 @@ static void print_itlb_cache_misses(struct perf_stat_config *config,
|
|||
}
|
||||
|
||||
static void print_ll_cache_misses(struct perf_stat_config *config,
|
||||
int cpu,
|
||||
struct evsel *evsel,
|
||||
double avg,
|
||||
int cpu, double avg,
|
||||
struct perf_stat_output_ctx *out,
|
||||
struct runtime_stat *st)
|
||||
struct runtime_stat *st,
|
||||
struct runtime_stat_data *rsd)
|
||||
{
|
||||
double total, ratio = 0.0;
|
||||
const char *color;
|
||||
int ctx = evsel_context(evsel);
|
||||
|
||||
total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu);
|
||||
total = runtime_stat_avg(st, STAT_LL_CACHE, cpu, rsd);
|
||||
|
||||
if (total)
|
||||
ratio = avg / total * 100.0;
|
||||
|
@ -662,56 +669,61 @@ static double sanitize_val(double x)
|
|||
return x;
|
||||
}
|
||||
|
||||
static double td_total_slots(int ctx, int cpu, struct runtime_stat *st)
|
||||
static double td_total_slots(int cpu, struct runtime_stat *st,
|
||||
struct runtime_stat_data *rsd)
|
||||
{
|
||||
return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu);
|
||||
return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu, rsd);
|
||||
}
|
||||
|
||||
static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st)
|
||||
static double td_bad_spec(int cpu, struct runtime_stat *st,
|
||||
struct runtime_stat_data *rsd)
|
||||
{
|
||||
double bad_spec = 0;
|
||||
double total_slots;
|
||||
double total;
|
||||
|
||||
total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) -
|
||||
runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) +
|
||||
runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu);
|
||||
total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu, rsd) -
|
||||
runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu, rsd) +
|
||||
runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu, rsd);
|
||||
|
||||
total_slots = td_total_slots(ctx, cpu, st);
|
||||
total_slots = td_total_slots(cpu, st, rsd);
|
||||
if (total_slots)
|
||||
bad_spec = total / total_slots;
|
||||
return sanitize_val(bad_spec);
|
||||
}
|
||||
|
||||
static double td_retiring(int ctx, int cpu, struct runtime_stat *st)
|
||||
static double td_retiring(int cpu, struct runtime_stat *st,
|
||||
struct runtime_stat_data *rsd)
|
||||
{
|
||||
double retiring = 0;
|
||||
double total_slots = td_total_slots(ctx, cpu, st);
|
||||
double total_slots = td_total_slots(cpu, st, rsd);
|
||||
double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED,
|
||||
ctx, cpu);
|
||||
cpu, rsd);
|
||||
|
||||
if (total_slots)
|
||||
retiring = ret_slots / total_slots;
|
||||
return retiring;
|
||||
}
|
||||
|
||||
static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st)
|
||||
static double td_fe_bound(int cpu, struct runtime_stat *st,
|
||||
struct runtime_stat_data *rsd)
|
||||
{
|
||||
double fe_bound = 0;
|
||||
double total_slots = td_total_slots(ctx, cpu, st);
|
||||
double total_slots = td_total_slots(cpu, st, rsd);
|
||||
double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES,
|
||||
ctx, cpu);
|
||||
cpu, rsd);
|
||||
|
||||
if (total_slots)
|
||||
fe_bound = fetch_bub / total_slots;
|
||||
return fe_bound;
|
||||
}
|
||||
|
||||
static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)
|
||||
static double td_be_bound(int cpu, struct runtime_stat *st,
|
||||
struct runtime_stat_data *rsd)
|
||||
{
|
||||
double sum = (td_fe_bound(ctx, cpu, st) +
|
||||
td_bad_spec(ctx, cpu, st) +
|
||||
td_retiring(ctx, cpu, st));
|
||||
double sum = (td_fe_bound(cpu, st, rsd) +
|
||||
td_bad_spec(cpu, st, rsd) +
|
||||
td_retiring(cpu, st, rsd));
|
||||
if (sum == 0)
|
||||
return 0;
|
||||
return sanitize_val(1.0 - sum);
|
||||
|
@ -722,15 +734,15 @@ static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)
|
|||
* the ratios we need to recreate the sum.
|
||||
*/
|
||||
|
||||
static double td_metric_ratio(int ctx, int cpu,
|
||||
enum stat_type type,
|
||||
struct runtime_stat *stat)
|
||||
static double td_metric_ratio(int cpu, enum stat_type type,
|
||||
struct runtime_stat *stat,
|
||||
struct runtime_stat_data *rsd)
|
||||
{
|
||||
double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) +
|
||||
runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) +
|
||||
runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) +
|
||||
runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu);
|
||||
double d = runtime_stat_avg(stat, type, ctx, cpu);
|
||||
double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) +
|
||||
runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) +
|
||||
runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) +
|
||||
runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd);
|
||||
double d = runtime_stat_avg(stat, type, cpu, rsd);
|
||||
|
||||
if (sum)
|
||||
return d / sum;
|
||||
|
@ -742,34 +754,33 @@ static double td_metric_ratio(int ctx, int cpu,
|
|||
* We allow two missing.
|
||||
*/
|
||||
|
||||
static bool full_td(int ctx, int cpu,
|
||||
struct runtime_stat *stat)
|
||||
static bool full_td(int cpu, struct runtime_stat *stat,
|
||||
struct runtime_stat_data *rsd)
|
||||
{
|
||||
int c = 0;
|
||||
|
||||
if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) > 0)
|
||||
if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) > 0)
|
||||
c++;
|
||||
if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) > 0)
|
||||
if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) > 0)
|
||||
c++;
|
||||
if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) > 0)
|
||||
if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) > 0)
|
||||
c++;
|
||||
if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu) > 0)
|
||||
if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd) > 0)
|
||||
c++;
|
||||
return c >= 2;
|
||||
}
|
||||
|
||||
static void print_smi_cost(struct perf_stat_config *config,
|
||||
int cpu, struct evsel *evsel,
|
||||
static void print_smi_cost(struct perf_stat_config *config, int cpu,
|
||||
struct perf_stat_output_ctx *out,
|
||||
struct runtime_stat *st)
|
||||
struct runtime_stat *st,
|
||||
struct runtime_stat_data *rsd)
|
||||
{
|
||||
double smi_num, aperf, cycles, cost = 0.0;
|
||||
int ctx = evsel_context(evsel);
|
||||
const char *color = NULL;
|
||||
|
||||
smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu);
|
||||
aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu);
|
||||
cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
|
||||
smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu, rsd);
|
||||
aperf = runtime_stat_avg(st, STAT_APERF, cpu, rsd);
|
||||
cycles = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd);
|
||||
|
||||
if ((cycles == 0) || (aperf == 0))
|
||||
return;
|
||||
|
@ -804,7 +815,8 @@ static int prepare_metric(struct evsel **metric_events,
|
|||
scale = 1e-9;
|
||||
} else {
|
||||
v = saved_value_lookup(metric_events[i], cpu, false,
|
||||
STAT_NONE, 0, st);
|
||||
STAT_NONE, 0, st,
|
||||
metric_events[i]->cgrp);
|
||||
if (!v)
|
||||
break;
|
||||
stats = &v->stats;
|
||||
|
@ -930,12 +942,15 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
|||
print_metric_t print_metric = out->print_metric;
|
||||
double total, ratio = 0.0, total2;
|
||||
const char *color = NULL;
|
||||
int ctx = evsel_context(evsel);
|
||||
struct runtime_stat_data rsd = {
|
||||
.ctx = evsel_context(evsel),
|
||||
.cgrp = evsel->cgrp,
|
||||
};
|
||||
struct metric_event *me;
|
||||
int num = 1;
|
||||
|
||||
if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
|
||||
total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
|
||||
total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd);
|
||||
|
||||
if (total) {
|
||||
ratio = avg / total;
|
||||
|
@ -945,12 +960,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
|||
print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
|
||||
}
|
||||
|
||||
total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT,
|
||||
ctx, cpu);
|
||||
total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu, &rsd);
|
||||
|
||||
total = max(total, runtime_stat_avg(st,
|
||||
STAT_STALLED_CYCLES_BACK,
|
||||
ctx, cpu));
|
||||
cpu, &rsd));
|
||||
|
||||
if (total && avg) {
|
||||
out->new_line(config, ctxp);
|
||||
|
@ -960,8 +974,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
|||
ratio);
|
||||
}
|
||||
} else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
|
||||
if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0)
|
||||
print_branch_misses(config, cpu, evsel, avg, out, st);
|
||||
if (runtime_stat_n(st, STAT_BRANCHES, cpu, &rsd) != 0)
|
||||
print_branch_misses(config, cpu, avg, out, st, &rsd);
|
||||
else
|
||||
print_metric(config, ctxp, NULL, NULL, "of all branches", 0);
|
||||
} else if (
|
||||
|
@ -970,8 +984,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
|||
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
|
||||
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
|
||||
|
||||
if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0)
|
||||
print_l1_dcache_misses(config, cpu, evsel, avg, out, st);
|
||||
if (runtime_stat_n(st, STAT_L1_DCACHE, cpu, &rsd) != 0)
|
||||
print_l1_dcache_misses(config, cpu, avg, out, st, &rsd);
|
||||
else
|
||||
print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0);
|
||||
} else if (
|
||||
|
@ -980,8 +994,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
|||
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
|
||||
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
|
||||
|
||||
if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0)
|
||||
print_l1_icache_misses(config, cpu, evsel, avg, out, st);
|
||||
if (runtime_stat_n(st, STAT_L1_ICACHE, cpu, &rsd) != 0)
|
||||
print_l1_icache_misses(config, cpu, avg, out, st, &rsd);
|
||||
else
|
||||
print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0);
|
||||
} else if (
|
||||
|
@ -990,8 +1004,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
|||
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
|
||||
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
|
||||
|
||||
if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0)
|
||||
print_dtlb_cache_misses(config, cpu, evsel, avg, out, st);
|
||||
if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu, &rsd) != 0)
|
||||
print_dtlb_cache_misses(config, cpu, avg, out, st, &rsd);
|
||||
else
|
||||
print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0);
|
||||
} else if (
|
||||
|
@ -1000,8 +1014,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
|||
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
|
||||
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
|
||||
|
||||
if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0)
|
||||
print_itlb_cache_misses(config, cpu, evsel, avg, out, st);
|
||||
if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu, &rsd) != 0)
|
||||
print_itlb_cache_misses(config, cpu, avg, out, st, &rsd);
|
||||
else
|
||||
print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0);
|
||||
} else if (
|
||||
|
@ -1010,27 +1024,27 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
|||
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
|
||||
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
|
||||
|
||||
if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0)
|
||||
print_ll_cache_misses(config, cpu, evsel, avg, out, st);
|
||||
if (runtime_stat_n(st, STAT_LL_CACHE, cpu, &rsd) != 0)
|
||||
print_ll_cache_misses(config, cpu, avg, out, st, &rsd);
|
||||
else
|
||||
print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0);
|
||||
} else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
|
||||
total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu);
|
||||
total = runtime_stat_avg(st, STAT_CACHEREFS, cpu, &rsd);
|
||||
|
||||
if (total)
|
||||
ratio = avg * 100 / total;
|
||||
|
||||
if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0)
|
||||
if (runtime_stat_n(st, STAT_CACHEREFS, cpu, &rsd) != 0)
|
||||
print_metric(config, ctxp, NULL, "%8.3f %%",
|
||||
"of all cache refs", ratio);
|
||||
else
|
||||
print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
|
||||
} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
|
||||
print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st);
|
||||
print_stalled_cycles_frontend(config, cpu, avg, out, st, &rsd);
|
||||
} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
|
||||
print_stalled_cycles_backend(config, cpu, evsel, avg, out, st);
|
||||
print_stalled_cycles_backend(config, cpu, avg, out, st, &rsd);
|
||||
} else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
|
||||
total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
|
||||
total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd);
|
||||
|
||||
if (total) {
|
||||
ratio = avg / total;
|
||||
|
@ -1039,7 +1053,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
|||
print_metric(config, ctxp, NULL, NULL, "Ghz", 0);
|
||||
}
|
||||
} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
|
||||
total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
|
||||
total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd);
|
||||
|
||||
if (total)
|
||||
print_metric(config, ctxp, NULL,
|
||||
|
@ -1049,8 +1063,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
|||
print_metric(config, ctxp, NULL, NULL, "transactional cycles",
|
||||
0);
|
||||
} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
|
||||
total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
|
||||
total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu);
|
||||
total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd);
|
||||
total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd);
|
||||
|
||||
if (total2 < avg)
|
||||
total2 = avg;
|
||||
|
@ -1060,21 +1074,19 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
|||
else
|
||||
print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0);
|
||||
} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
|
||||
total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
|
||||
ctx, cpu);
|
||||
total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd);
|
||||
|
||||
if (avg)
|
||||
ratio = total / avg;
|
||||
|
||||
if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0)
|
||||
if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu, &rsd) != 0)
|
||||
print_metric(config, ctxp, NULL, "%8.0f",
|
||||
"cycles / transaction", ratio);
|
||||
else
|
||||
print_metric(config, ctxp, NULL, NULL, "cycles / transaction",
|
||||
0);
|
||||
} else if (perf_stat_evsel__is(evsel, ELISION_START)) {
|
||||
total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
|
||||
ctx, cpu);
|
||||
total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd);
|
||||
|
||||
if (avg)
|
||||
ratio = total / avg;
|
||||
|
@ -1087,28 +1099,28 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
|||
else
|
||||
print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
|
||||
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
|
||||
double fe_bound = td_fe_bound(ctx, cpu, st);
|
||||
double fe_bound = td_fe_bound(cpu, st, &rsd);
|
||||
|
||||
if (fe_bound > 0.2)
|
||||
color = PERF_COLOR_RED;
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
|
||||
fe_bound * 100.);
|
||||
} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
|
||||
double retiring = td_retiring(ctx, cpu, st);
|
||||
double retiring = td_retiring(cpu, st, &rsd);
|
||||
|
||||
if (retiring > 0.7)
|
||||
color = PERF_COLOR_GREEN;
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "retiring",
|
||||
retiring * 100.);
|
||||
} else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
|
||||
double bad_spec = td_bad_spec(ctx, cpu, st);
|
||||
double bad_spec = td_bad_spec(cpu, st, &rsd);
|
||||
|
||||
if (bad_spec > 0.1)
|
||||
color = PERF_COLOR_RED;
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
|
||||
bad_spec * 100.);
|
||||
} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
|
||||
double be_bound = td_be_bound(ctx, cpu, st);
|
||||
double be_bound = td_be_bound(cpu, st, &rsd);
|
||||
const char *name = "backend bound";
|
||||
static int have_recovery_bubbles = -1;
|
||||
|
||||
|
@ -1121,43 +1133,43 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
|||
|
||||
if (be_bound > 0.2)
|
||||
color = PERF_COLOR_RED;
|
||||
if (td_total_slots(ctx, cpu, st) > 0)
|
||||
if (td_total_slots(cpu, st, &rsd) > 0)
|
||||
print_metric(config, ctxp, color, "%8.1f%%", name,
|
||||
be_bound * 100.);
|
||||
else
|
||||
print_metric(config, ctxp, NULL, NULL, name, 0);
|
||||
} else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) &&
|
||||
full_td(ctx, cpu, st)) {
|
||||
double retiring = td_metric_ratio(ctx, cpu,
|
||||
STAT_TOPDOWN_RETIRING, st);
|
||||
|
||||
full_td(cpu, st, &rsd)) {
|
||||
double retiring = td_metric_ratio(cpu,
|
||||
STAT_TOPDOWN_RETIRING, st,
|
||||
&rsd);
|
||||
if (retiring > 0.7)
|
||||
color = PERF_COLOR_GREEN;
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "retiring",
|
||||
retiring * 100.);
|
||||
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) &&
|
||||
full_td(ctx, cpu, st)) {
|
||||
double fe_bound = td_metric_ratio(ctx, cpu,
|
||||
STAT_TOPDOWN_FE_BOUND, st);
|
||||
|
||||
full_td(cpu, st, &rsd)) {
|
||||
double fe_bound = td_metric_ratio(cpu,
|
||||
STAT_TOPDOWN_FE_BOUND, st,
|
||||
&rsd);
|
||||
if (fe_bound > 0.2)
|
||||
color = PERF_COLOR_RED;
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
|
||||
fe_bound * 100.);
|
||||
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) &&
|
||||
full_td(ctx, cpu, st)) {
|
||||
double be_bound = td_metric_ratio(ctx, cpu,
|
||||
STAT_TOPDOWN_BE_BOUND, st);
|
||||
|
||||
full_td(cpu, st, &rsd)) {
|
||||
double be_bound = td_metric_ratio(cpu,
|
||||
STAT_TOPDOWN_BE_BOUND, st,
|
||||
&rsd);
|
||||
if (be_bound > 0.2)
|
||||
color = PERF_COLOR_RED;
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "backend bound",
|
||||
be_bound * 100.);
|
||||
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) &&
|
||||
full_td(ctx, cpu, st)) {
|
||||
double bad_spec = td_metric_ratio(ctx, cpu,
|
||||
STAT_TOPDOWN_BAD_SPEC, st);
|
||||
|
||||
full_td(cpu, st, &rsd)) {
|
||||
double bad_spec = td_metric_ratio(cpu,
|
||||
STAT_TOPDOWN_BAD_SPEC, st,
|
||||
&rsd);
|
||||
if (bad_spec > 0.1)
|
||||
color = PERF_COLOR_RED;
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
|
||||
|
@ -1165,11 +1177,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
|||
} else if (evsel->metric_expr) {
|
||||
generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
|
||||
evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
|
||||
} else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
|
||||
} else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) {
|
||||
char unit = 'M';
|
||||
char unit_buf[10];
|
||||
|
||||
total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
|
||||
total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd);
|
||||
|
||||
if (total)
|
||||
ratio = 1000.0 * avg / total;
|
||||
|
@ -1180,7 +1192,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
|||
snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
|
||||
print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
|
||||
} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
|
||||
print_smi_cost(config, cpu, evsel, out, st);
|
||||
print_smi_cost(config, cpu, out, st, &rsd);
|
||||
} else {
|
||||
num = 0;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue