ring-buffer: Give NMIs a chance to lock the reader_lock
[deliverable/linux.git] / kernel / trace / ring_buffer.c
index 0315d43176d80d264c37f22ba403a5089a124c48..e9420fdc74094722e2edc796ea86da15bbf1e92e 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
  */
-#include <linux/ftrace_event.h>
+#include <linux/trace_events.h>
 #include <linux/ring_buffer.h>
 #include <linux/trace_clock.h>
 #include <linux/trace_seq.h>
@@ -462,6 +462,7 @@ struct ring_buffer_per_cpu {
        arch_spinlock_t                 lock;
        struct lock_class_key           lock_key;
        unsigned int                    nr_pages;
+       unsigned int                    current_context;
        struct list_head                *pages;
        struct buffer_page              *head_page;     /* read from head */
        struct buffer_page              *tail_page;     /* write to tail */
@@ -2636,8 +2637,6 @@ rb_reserve_next_event(struct ring_buffer *buffer,
        return NULL;
 }
 
-#ifdef CONFIG_TRACING
-
 /*
  * The lock and unlock are done within a preempt disable section.
  * The current_context per_cpu variable can only be modified
@@ -2675,11 +2674,11 @@ rb_reserve_next_event(struct ring_buffer *buffer,
  * just so happens that it is the same bit corresponding to
  * the current context.
  */
-static DEFINE_PER_CPU(unsigned int, current_context);
 
-static __always_inline int trace_recursive_lock(void)
+static __always_inline int
+trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
 {
-       unsigned int val = __this_cpu_read(current_context);
+       unsigned int val = cpu_buffer->current_context;
        int bit;
 
        if (in_interrupt()) {
@@ -2696,23 +2695,17 @@ static __always_inline int trace_recursive_lock(void)
                return 1;
 
        val |= (1 << bit);
-       __this_cpu_write(current_context, val);
+       cpu_buffer->current_context = val;
 
        return 0;
 }
 
-static __always_inline void trace_recursive_unlock(void)
+static __always_inline void
+trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
 {
-       __this_cpu_and(current_context, __this_cpu_read(current_context) - 1);
+       cpu_buffer->current_context &= cpu_buffer->current_context - 1;
 }
 
-#else
-
-#define trace_recursive_lock()         (0)
-#define trace_recursive_unlock()       do { } while (0)
-
-#endif
-
 /**
  * ring_buffer_lock_reserve - reserve a part of the buffer
  * @buffer: the ring buffer to reserve from
@@ -2741,35 +2734,34 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
        /* If we are tracing schedule, we don't want to recurse */
        preempt_disable_notrace();
 
-       if (atomic_read(&buffer->record_disabled))
-               goto out_nocheck;
-
-       if (trace_recursive_lock())
-               goto out_nocheck;
+       if (unlikely(atomic_read(&buffer->record_disabled)))
+               goto out;
 
        cpu = raw_smp_processor_id();
 
-       if (!cpumask_test_cpu(cpu, buffer->cpumask))
+       if (unlikely(!cpumask_test_cpu(cpu, buffer->cpumask)))
                goto out;
 
        cpu_buffer = buffer->buffers[cpu];
 
-       if (atomic_read(&cpu_buffer->record_disabled))
+       if (unlikely(atomic_read(&cpu_buffer->record_disabled)))
                goto out;
 
-       if (length > BUF_MAX_DATA_SIZE)
+       if (unlikely(length > BUF_MAX_DATA_SIZE))
+               goto out;
+
+       if (unlikely(trace_recursive_lock(cpu_buffer)))
                goto out;
 
        event = rb_reserve_next_event(buffer, cpu_buffer, length);
        if (!event)
-               goto out;
+               goto out_unlock;
 
        return event;
 
+ out_unlock:
+       trace_recursive_unlock(cpu_buffer);
  out:
-       trace_recursive_unlock();
-
- out_nocheck:
        preempt_enable_notrace();
        return NULL;
 }
@@ -2859,7 +2851,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
 
        rb_wakeups(buffer, cpu_buffer);
 
-       trace_recursive_unlock();
+       trace_recursive_unlock(cpu_buffer);
 
        preempt_enable_notrace();
 
@@ -2970,7 +2962,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
  out:
        rb_end_commit(cpu_buffer);
 
-       trace_recursive_unlock();
+       trace_recursive_unlock(cpu_buffer);
 
        preempt_enable_notrace();
 
@@ -3021,9 +3013,12 @@ int ring_buffer_write(struct ring_buffer *buffer,
        if (length > BUF_MAX_DATA_SIZE)
                goto out;
 
+       if (unlikely(trace_recursive_lock(cpu_buffer)))
+               goto out;
+
        event = rb_reserve_next_event(buffer, cpu_buffer, length);
        if (!event)
-               goto out;
+               goto out_unlock;
 
        body = rb_event_data(event);
 
@@ -3034,6 +3029,10 @@ int ring_buffer_write(struct ring_buffer *buffer,
        rb_wakeups(buffer, cpu_buffer);
 
        ret = 0;
+
+ out_unlock:
+       trace_recursive_unlock(cpu_buffer);
+
  out:
        preempt_enable_notrace();
 
@@ -3860,19 +3859,36 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 }
 EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
 
-static inline int rb_ok_to_lock(void)
+static inline bool rb_reader_lock(struct ring_buffer_per_cpu *cpu_buffer)
 {
+       if (likely(!in_nmi())) {
+               raw_spin_lock(&cpu_buffer->reader_lock);
+               return true;
+       }
+
        /*
         * If an NMI die dumps out the content of the ring buffer
-        * do not grab locks. We also permanently disable the ring
-        * buffer too. A one time deal is all you get from reading
-        * the ring buffer from an NMI.
+        * trylock must be used to prevent a deadlock if the NMI
+        * preempted a task that holds the ring buffer locks. If
+        * we get the lock then all is fine, if not, then continue
+        * to do the read, but this can corrupt the ring buffer,
+        * so it must be permanently disabled from future writes.
+        * Reading from NMI is a oneshot deal.
         */
-       if (likely(!in_nmi()))
-               return 1;
+       if (raw_spin_trylock(&cpu_buffer->reader_lock))
+               return true;
 
-       tracing_off_permanent();
-       return 0;
+       /* Continue without locking, but disable the ring buffer */
+       atomic_inc(&cpu_buffer->record_disabled);
+       return false;
+}
+
+static inline void
+rb_reader_unlock(struct ring_buffer_per_cpu *cpu_buffer, bool locked)
+{
+       if (likely(locked))
+               raw_spin_unlock(&cpu_buffer->reader_lock);
+       return;
 }
 
 /**
@@ -3892,21 +3908,18 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
        struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
        struct ring_buffer_event *event;
        unsigned long flags;
-       int dolock;
+       bool dolock;
 
        if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return NULL;
 
-       dolock = rb_ok_to_lock();
  again:
        local_irq_save(flags);
-       if (dolock)
-               raw_spin_lock(&cpu_buffer->reader_lock);
+       dolock = rb_reader_lock(cpu_buffer);
        event = rb_buffer_peek(cpu_buffer, ts, lost_events);
        if (event && event->type_len == RINGBUF_TYPE_PADDING)
                rb_advance_reader(cpu_buffer);
-       if (dolock)
-               raw_spin_unlock(&cpu_buffer->reader_lock);
+       rb_reader_unlock(cpu_buffer, dolock);
        local_irq_restore(flags);
 
        if (event && event->type_len == RINGBUF_TYPE_PADDING)
@@ -3959,9 +3972,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
        struct ring_buffer_per_cpu *cpu_buffer;
        struct ring_buffer_event *event = NULL;
        unsigned long flags;
-       int dolock;
-
-       dolock = rb_ok_to_lock();
+       bool dolock;
 
  again:
        /* might be called in atomic */
@@ -3972,8 +3983,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
 
        cpu_buffer = buffer->buffers[cpu];
        local_irq_save(flags);
-       if (dolock)
-               raw_spin_lock(&cpu_buffer->reader_lock);
+       dolock = rb_reader_lock(cpu_buffer);
 
        event = rb_buffer_peek(cpu_buffer, ts, lost_events);
        if (event) {
@@ -3981,8 +3991,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
                rb_advance_reader(cpu_buffer);
        }
 
-       if (dolock)
-               raw_spin_unlock(&cpu_buffer->reader_lock);
+       rb_reader_unlock(cpu_buffer, dolock);
        local_irq_restore(flags);
 
  out:
@@ -4263,21 +4272,17 @@ int ring_buffer_empty(struct ring_buffer *buffer)
 {
        struct ring_buffer_per_cpu *cpu_buffer;
        unsigned long flags;
-       int dolock;
+       bool dolock;
        int cpu;
        int ret;
 
-       dolock = rb_ok_to_lock();
-
        /* yes this is racy, but if you don't like the race, lock the buffer */
        for_each_buffer_cpu(buffer, cpu) {
                cpu_buffer = buffer->buffers[cpu];
                local_irq_save(flags);
-               if (dolock)
-                       raw_spin_lock(&cpu_buffer->reader_lock);
+               dolock = rb_reader_lock(cpu_buffer);
                ret = rb_per_cpu_empty(cpu_buffer);
-               if (dolock)
-                       raw_spin_unlock(&cpu_buffer->reader_lock);
+               rb_reader_unlock(cpu_buffer, dolock);
                local_irq_restore(flags);
 
                if (!ret)
@@ -4297,21 +4302,17 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
 {
        struct ring_buffer_per_cpu *cpu_buffer;
        unsigned long flags;
-       int dolock;
+       bool dolock;
        int ret;
 
        if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return 1;
 
-       dolock = rb_ok_to_lock();
-
        cpu_buffer = buffer->buffers[cpu];
        local_irq_save(flags);
-       if (dolock)
-               raw_spin_lock(&cpu_buffer->reader_lock);
+       dolock = rb_reader_lock(cpu_buffer);
        ret = rb_per_cpu_empty(cpu_buffer);
-       if (dolock)
-               raw_spin_unlock(&cpu_buffer->reader_lock);
+       rb_reader_unlock(cpu_buffer, dolock);
        local_irq_restore(flags);
 
        return ret;
This page took 0.034656 seconds and 5 git commands to generate.