drivers/oprofile/cpu_buffer.c

   1 /**
   2  * @file cpu_buffer.c
   3  *
   4  * @remark Copyright 2002 OProfile authors
   5  * @remark Read the file COPYING
   6  *
   7  * @author John Levon <levon@movementarian.org>
   8  *
   9  * Each CPU has a local buffer that stores PC value/event
  10  * pairs. We also log context switches when we notice them.
  11  * Eventually each CPU's buffer is processed into the global
  12  * event buffer by sync_buffer().
  13  *
  14  * We use a local buffer for two reasons: an NMI or similar
  15  * interrupt cannot synchronise, and high sampling rates
  16  * would lead to catastrophic global synchronisation if
  17  * a global buffer was used.
  18  */
  19
  20 #include <linux/sched.h>
  21 #include <linux/vmalloc.h>
  22 #include <linux/errno.h>
  23
  24 #include "cpu_buffer.h"
  25 #include "buffer_sync.h"
  26 #include "oprof.h"
  27
  28 struct oprofile_cpu_buffer cpu_buffer[NR_CPUS] __cacheline_aligned;
  29
  30 static void wq_sync_buffer(void *);
  31
  32 #define DEFAULT_TIMER_EXPIRE (HZ / 10)
  33 int work_enabled;
  34
  35 static void __free_cpu_buffers(int num)
  36 {
  37         int i;
  38
  39         for_each_online_cpu(i) {
  40                 if (cpu_buffer[i].buffer)
  41                         vfree(cpu_buffer[i].buffer);
  42         }
  43 }
  44
  45
  46 int alloc_cpu_buffers(void)
  47 {
  48         int i;
  49
  50         unsigned long buffer_size = fs_cpu_buffer_size;
  51
  52         for_each_online_cpu(i) {
  53                 struct oprofile_cpu_buffer * b = &cpu_buffer[i];
  54
  55                 b->buffer = vmalloc(sizeof(struct op_sample) * buffer_size);
  56                 if (!b->buffer)
  57                         goto fail;
  58
  59                 b->last_task = NULL;
  60                 b->last_is_kernel = -1;
  61                 b->buffer_size = buffer_size;
  62                 b->tail_pos = 0;
  63                 b->head_pos = 0;
  64                 b->sample_received = 0;
  65                 b->sample_lost_overflow = 0;
  66                 b->cpu = i;
  67                 INIT_WORK(&b->work, wq_sync_buffer, b);
  68         }
  69         return 0;
  70
  71 fail:
  72         __free_cpu_buffers(i);
  73         return -ENOMEM;
  74 }
  75
  76
  77 void free_cpu_buffers(void)
  78 {
  79         __free_cpu_buffers(NR_CPUS);
  80 }
  81
  82
  83 void start_cpu_work(void)
  84 {
  85         int i;
  86
  87         work_enabled = 1;
  88
  89         for_each_online_cpu(i) {
  90                 struct oprofile_cpu_buffer * b = &cpu_buffer[i];
  91
  92                 /*
  93                  * Spread the work by 1 jiffy per cpu so they dont all
  94                  * fire at once.
  95                  */
  96                 schedule_delayed_work_on(i, &b->work, DEFAULT_TIMER_EXPIRE + i);
  97         }
  98 }
  99
 100
 101 void end_cpu_work(void)
 102 {
 103         int i;
 104
 105         work_enabled = 0;
 106
 107         for_each_online_cpu(i) {
 108                 struct oprofile_cpu_buffer * b = &cpu_buffer[i];
 109
 110                 cancel_delayed_work(&b->work);
 111         }
 112
 113         flush_scheduled_work();
 114 }
 115
 116
 117 /* compute number of available slots in cpu_buffer queue */
 118 static unsigned long nr_available_slots(struct oprofile_cpu_buffer const * b)
 119 {
 120         unsigned long head = b->head_pos;
 121         unsigned long tail = b->tail_pos;
 122
 123         if (tail > head)
 124                 return (tail - head) - 1;
 125
 126         return tail + (b->buffer_size - head) - 1;
 127 }
 128
 129
 130 static void increment_head(struct oprofile_cpu_buffer * b)
 131 {
 132         unsigned long new_head = b->head_pos + 1;
 133
 134         /* Ensure anything written to the slot before we
 135          * increment is visible */
 136         wmb();
 137
 138         if (new_head < (b->buffer_size))
 139                 b->head_pos = new_head;
 140         else
 141                 b->head_pos = 0;
 142 }
 143
 144
 145 /* This must be safe from any context. It's safe writing here
 146  * because of the head/tail separation of the writer and reader
 147  * of the CPU buffer.
 148  *
 149  * is_kernel is needed because on some architectures you cannot
 150  * tell if you are in kernel or user space simply by looking at
 151  * eip. We tag this in the buffer by generating kernel enter/exit
 152  * events whenever is_kernel changes
 153  */
 154 void oprofile_add_sample(unsigned long eip, unsigned int is_kernel,
 155         unsigned long event, int cpu)
 156 {
 157         struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[cpu];
 158         struct task_struct * task;
 159
 160         is_kernel = !!is_kernel;
 161
 162         cpu_buf->sample_received++;
 163
 164
 165         if (nr_available_slots(cpu_buf) < 3) {
 166                 cpu_buf->sample_lost_overflow++;
 167                 return;
 168         }
 169
 170         task = current;
 171
 172         /* notice a switch from user->kernel or vice versa */
 173         if (cpu_buf->last_is_kernel != is_kernel) {
 174                 cpu_buf->last_is_kernel = is_kernel;
 175                 cpu_buf->buffer[cpu_buf->head_pos].eip = ~0UL;
 176                 cpu_buf->buffer[cpu_buf->head_pos].event = is_kernel;
 177                 increment_head(cpu_buf);
 178         }
 179
 180         /* notice a task switch */
 181         if (cpu_buf->last_task != task) {
 182                 cpu_buf->last_task = task;
 183                 cpu_buf->buffer[cpu_buf->head_pos].eip = ~0UL;
 184                 cpu_buf->buffer[cpu_buf->head_pos].event = (unsigned long)task;
 185                 increment_head(cpu_buf);
 186         }
 187
 188         cpu_buf->buffer[cpu_buf->head_pos].eip = eip;
 189         cpu_buf->buffer[cpu_buf->head_pos].event = event;
 190         increment_head(cpu_buf);
 191 }
 192
 193
 194 /* Resets the cpu buffer to a sane state. */
 195 void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf)
 196 {
 197         /* reset these to invalid values; the next sample
 198          * collected will populate the buffer with proper
 199          * values to initialize the buffer
 200          */
 201         cpu_buf->last_is_kernel = -1;
 202         cpu_buf->last_task = NULL;
 203 }
 204
 205
 206 /*
 207  * This serves to avoid cpu buffer overflow, and makes sure
 208  * the task mortuary progresses
 209  *
 210  * By using schedule_delayed_work_on and then schedule_delayed_work
 211  * we guarantee this will stay on the correct cpu
 212  */
 213 static void wq_sync_buffer(void * data)
 214 {
 215         struct oprofile_cpu_buffer * b = (struct oprofile_cpu_buffer *)data;
 216         if (b->cpu != smp_processor_id()) {
 217                 printk("WQ on CPU%d, prefer CPU%d\n",
 218                        smp_processor_id(), b->cpu);
 219         }
 220         sync_buffer(b->cpu);
 221
 222         /* don't re-add the work if we're shutting down */
 223         if (work_enabled)
 224                 schedule_delayed_work(&b->work, DEFAULT_TIMER_EXPIRE);
 225 }