From 70303420b5721c38998cf987e6b7d30cc62d4ff1 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 21 Jun 2018 13:20:53 -0400 Subject: [PATCH 1/5] tracing: Check for no filter when processing event filters The syzkaller detected a out-of-bounds issue with the events filter code, specifically here: prog[N].pred = NULL; /* #13 */ prog[N].target = 1; /* TRUE */ prog[N+1].pred = NULL; prog[N+1].target = 0; /* FALSE */ -> prog[N-1].target = N; prog[N-1].when_to_branch = false; As that's the first reference to a "N-1" index, it appears that the code got here with N = 0, which means the filter parser found no filter to parse (which shouldn't ever happen, but apparently it did). Add a new error to the parsing code that will check to make sure that N is not zero before going into this part of the code. If N = 0, then -EINVAL is returned, and a error message is added to the filter. Cc: stable@vger.kernel.org Fixes: 80765597bc587 ("tracing: Rewrite filter logic to be simpler and faster") Reported-by: air icy bugzilla url: https://bugzilla.kernel.org/show_bug.cgi?id=200019 Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_filter.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index e1c818dbc0d7..0dceb77d1d42 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -78,7 +78,8 @@ static const char * ops[] = { OPS }; C(TOO_MANY_PREDS, "Too many terms in predicate expression"), \ C(INVALID_FILTER, "Meaningless filter expression"), \ C(IP_FIELD_ONLY, "Only 'ip' field is supported for function trace"), \ - C(INVALID_VALUE, "Invalid value (did you forget quotes)?"), + C(INVALID_VALUE, "Invalid value (did you forget quotes)?"), \ + C(NO_FILTER, "No filter found"), #undef C #define C(a, b) FILT_ERR_##a @@ -550,6 +551,13 @@ predicate_parse(const char *str, int nr_parens, int nr_preds, goto out_free; } + if (!N) { + /* No program? */ + ret = -EINVAL; + parse_error(pe, FILT_ERR_NO_FILTER, ptr - str); + goto out_free; + } + prog[N].pred = NULL; /* #13 */ prog[N].target = 1; /* TRUE */ prog[N+1].pred = NULL; From 1a63dcd8765bc8680481dc2f9acf6ef13cee6d27 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Thu, 7 Jun 2018 13:11:43 -0700 Subject: [PATCH 2/5] softirq: Reorder trace_softirqs_on to prevent lockdep splat I'm able to reproduce a lockdep splat with config options: CONFIG_PROVE_LOCKING=y, CONFIG_DEBUG_LOCK_ALLOC=y and CONFIG_PREEMPTIRQ_EVENTS=y $ echo 1 > /d/tracing/events/preemptirq/preempt_enable/enable [ 26.112609] DEBUG_LOCKS_WARN_ON(current->softirqs_enabled) [ 26.112636] WARNING: CPU: 0 PID: 118 at kernel/locking/lockdep.c:3854 [...] [ 26.144229] Call Trace: [ 26.144926] [ 26.145506] lock_acquire+0x55/0x1b0 [ 26.146499] ? __do_softirq+0x46f/0x4d9 [ 26.147571] ? __do_softirq+0x46f/0x4d9 [ 26.148646] trace_preempt_on+0x8f/0x240 [ 26.149744] ? trace_preempt_on+0x4d/0x240 [ 26.150862] ? __do_softirq+0x46f/0x4d9 [ 26.151930] preempt_count_sub+0x18a/0x1a0 [ 26.152985] __do_softirq+0x46f/0x4d9 [ 26.153937] irq_exit+0x68/0xe0 [ 26.154755] smp_apic_timer_interrupt+0x271/0x280 [ 26.156056] apic_timer_interrupt+0xf/0x20 [ 26.157105] The issue was this: preempt_count = 1 << SOFTIRQ_SHIFT __local_bh_enable(cnt = 1 << SOFTIRQ_SHIFT) { if (softirq_count() == (cnt && SOFTIRQ_MASK)) { trace_softirqs_on() { current->softirqs_enabled = 1; } } preempt_count_sub(cnt) { trace_preempt_on() { tracepoint() { rcu_read_lock_sched() { // jumps into lockdep Where preempt_count still has softirqs disabled, but current->softirqs_enabled is true, and we get a splat. Link: http://lkml.kernel.org/r/20180607201143.247775-1-joel@joelfernandes.org Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Tom Zanussi Cc: Namhyung Kim Cc: Thomas Glexiner Cc: Boqun Feng Cc: Paul McKenney Cc: Masami Hiramatsu Cc: Todd Kjos Cc: Erick Reyes Cc: Julia Cartwright Cc: Byungchul Park Cc: stable@vger.kernel.org Reviewed-by: Steven Rostedt (VMware) Fixes: d59158162e032 ("tracing: Add support for preempt and irq enable/disable events") Signed-off-by: Joel Fernandes (Google) Signed-off-by: Steven Rostedt (VMware) --- kernel/softirq.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index de2f57fddc04..900dcfee542c 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -139,9 +139,13 @@ static void __local_bh_enable(unsigned int cnt) { lockdep_assert_irqs_disabled(); + if (preempt_count() == cnt) + trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip()); + if (softirq_count() == (cnt & SOFTIRQ_MASK)) trace_softirqs_on(_RET_IP_); - preempt_count_sub(cnt); + + __preempt_count_sub(cnt); } /* From 08ae88f8104f486fd4103854119169f3e55dbc4c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 9 Feb 2018 11:53:16 -0600 Subject: [PATCH 3/5] tracing: Use swap macro in update_max_tr Make use of the swap macro and remove unnecessary variable _buf_. This makes the code easier to read and maintain. Also, reduces the stack usage. This code was detected with the help of Coccinelle. Link: http://lkml.kernel.org/r/20180209175316.GA18720@embeddedgus Signed-off-by: Gustavo A. R. Silva Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c9336e98ac59..a0079b4c7a49 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1360,8 +1360,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { - struct ring_buffer *buf; - if (tr->stop_count) return; @@ -1375,9 +1373,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) arch_spin_lock(&tr->max_lock); - buf = tr->trace_buffer.buffer; - tr->trace_buffer.buffer = tr->max_buffer.buffer; - tr->max_buffer.buffer = buf; + swap(tr->trace_buffer.buffer, tr->max_buffer.buffer); __update_max_tr(tr, tsk, cpu); arch_spin_unlock(&tr->max_lock); From 064f35a952246c60e956717dfc5782c48f174e74 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Thu, 14 Jun 2018 15:48:59 -0700 Subject: [PATCH 4/5] tracing: Fix some errors in histogram documentation Fix typos, inconsistencies in using quotes, incorrect section number, etc. in the trace histogram documentation. Link: http://lkml.kernel.org/r/20180614224859.55864-1-joel@joelfernandes.org Reviewed-by: Masami Hiramatsu Acked-by: Tom Zanussi Signed-off-by: Joel Fernandes (Google) Signed-off-by: Steven Rostedt (VMware) --- Documentation/trace/histogram.txt | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/Documentation/trace/histogram.txt b/Documentation/trace/histogram.txt index e73bcf9cb5f3..7ffea6aa22e3 100644 --- a/Documentation/trace/histogram.txt +++ b/Documentation/trace/histogram.txt @@ -1729,35 +1729,35 @@ If a variable isn't a key variable or prefixed with 'vals=', the associated event field will be saved in a variable but won't be summed as a value: - # echo 'hist:keys=next_pid:ts1=common_timestamp ... >> event/trigger + # echo 'hist:keys=next_pid:ts1=common_timestamp ...' >> event/trigger Multiple variables can be assigned at the same time. The below would result in both ts0 and b being created as variables, with both common_timestamp and field1 additionally being summed as values: - # echo 'hist:keys=pid:vals=$ts0,$b:ts0=common_timestamp,b=field1 ... >> \ + # echo 'hist:keys=pid:vals=$ts0,$b:ts0=common_timestamp,b=field1 ...' >> \ event/trigger Note that variable assignments can appear either preceding or following their use. The command below behaves identically to the command above: - # echo 'hist:keys=pid:ts0=common_timestamp,b=field1:vals=$ts0,$b ... >> \ + # echo 'hist:keys=pid:ts0=common_timestamp,b=field1:vals=$ts0,$b ...' >> \ event/trigger Any number of variables not bound to a 'vals=' prefix can also be assigned by simply separating them with colons. Below is the same thing but without the values being summed in the histogram: - # echo 'hist:keys=pid:ts0=common_timestamp:b=field1 ... >> event/trigger + # echo 'hist:keys=pid:ts0=common_timestamp:b=field1 ...' >> event/trigger Variables set as above can be referenced and used in expressions on another event. For example, here's how a latency can be calculated: - # echo 'hist:keys=pid,prio:ts0=common_timestamp ... >> event1/trigger - # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp-$ts0 ... >> event2/trigger + # echo 'hist:keys=pid,prio:ts0=common_timestamp ...' >> event1/trigger + # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp-$ts0 ...' >> event2/trigger In the first line above, the event's timetamp is saved into the variable ts0. In the next line, ts0 is subtracted from the second @@ -1766,7 +1766,7 @@ yet another variable, 'wakeup_lat'. The hist trigger below in turn makes use of the wakeup_lat variable to compute a combined latency using the same key and variable from yet another event: - # echo 'hist:key=pid:wakeupswitch_lat=$wakeup_lat+$switchtime_lat ... >> event3/trigger + # echo 'hist:key=pid:wakeupswitch_lat=$wakeup_lat+$switchtime_lat ...' >> event3/trigger 2.2.2 Synthetic Events ---------------------- @@ -1807,10 +1807,11 @@ the command that defined it with a '!': At this point, there isn't yet an actual 'wakeup_latency' event instantiated in the event subsytem - for this to happen, a 'hist trigger action' needs to be instantiated and bound to actual fields -and variables defined on other events (see Section 6.3.3 below). +and variables defined on other events (see Section 2.2.3 below on +how that is done using hist trigger 'onmatch' action). Once that is +done, the 'wakeup_latency' synthetic event instance is created. -Once that is done, an event instance is created, and a histogram can -be defined using it: +A histogram can now be defined for the new synthetic event: # echo 'hist:keys=pid,prio,lat.log2:sort=pid,lat' >> \ /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger @@ -1960,7 +1961,7 @@ hist trigger specification. back to that pid, the timestamp difference is calculated. If the resulting latency, stored in wakeup_lat, exceeds the current maximum latency, the values specified in the save() fields are - recoreded: + recorded: # echo 'hist:keys=pid:ts0=common_timestamp.usecs \ if comm=="cyclictest"' >> \ From ed7d40bc67b8353c677b38c6cdddcdc310c0f452 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Fri, 8 Jun 2018 14:47:46 -0700 Subject: [PATCH 5/5] tracing: Fix SKIP_STACK_VALIDATION=1 build due to bad merge with -mrecord-mcount Non gcc-5 builds with CONFIG_STACK_VALIDATION=y and SKIP_STACK_VALIDATION=1 fail. Example output: /bin/sh: init/.tmp_main.o: Permission denied commit 96f60dfa5819 ("trace: Use -mcount-record for dynamic ftrace"), added a mismatched endif. This causes cmd_objtool to get mistakenly set. Relocate endif to balance the newly added -record-mcount check. Link: http://lkml.kernel.org/r/20180608214746.136554-1-gthelen@google.com Fixes: 96f60dfa5819 ("trace: Use -mcount-record for dynamic ftrace") Acked-by: Andi Kleen Tested-by: David Rientjes Signed-off-by: Greg Thelen Signed-off-by: Steven Rostedt (VMware) --- scripts/Makefile.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 34d9e9ce97c2..e7889f486ca1 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -239,6 +239,7 @@ cmd_record_mcount = \ "$(CC_FLAGS_FTRACE)" ]; then \ $(sub_cmd_record_mcount) \ fi; +endif # -record-mcount endif # CONFIG_FTRACE_MCOUNT_RECORD ifdef CONFIG_STACK_VALIDATION @@ -263,7 +264,6 @@ ifneq ($(RETPOLINE_CFLAGS),) objtool_args += --retpoline endif endif -endif ifdef CONFIG_MODVERSIONS