1bcea3f96SSteven Rostedt (VMware) // SPDX-License-Identifier: GPL-2.0 27a8e76a3SSteven Rostedt /* 37a8e76a3SSteven Rostedt * Generic ring buffer 47a8e76a3SSteven Rostedt * 57a8e76a3SSteven Rostedt * Copyright (C) 2008 Steven Rostedt <[email protected]> 67a8e76a3SSteven Rostedt */ 7af658dcaSSteven Rostedt (Red Hat) #include <linux/trace_events.h> 87a8e76a3SSteven Rostedt #include <linux/ring_buffer.h> 914131f2fSIngo Molnar #include <linux/trace_clock.h> 10e6017571SIngo Molnar #include <linux/sched/clock.h> 110b07436dSSteven Rostedt #include <linux/trace_seq.h> 127a8e76a3SSteven Rostedt #include <linux/spinlock.h> 1315693458SSteven Rostedt (Red Hat) #include <linux/irq_work.h> 14a356646aSSteven Rostedt (VMware) #include <linux/security.h> 157a8e76a3SSteven Rostedt #include <linux/uaccess.h> 16a81bd80aSSteven Rostedt #include <linux/hardirq.h> 176c43e554SSteven Rostedt (Red Hat) #include <linux/kthread.h> /* for self test */ 187a8e76a3SSteven Rostedt #include <linux/module.h> 197a8e76a3SSteven Rostedt #include <linux/percpu.h> 207a8e76a3SSteven Rostedt #include <linux/mutex.h> 216c43e554SSteven Rostedt (Red Hat) #include <linux/delay.h> 225a0e3ad6STejun Heo #include <linux/slab.h> 237a8e76a3SSteven Rostedt #include <linux/init.h> 247a8e76a3SSteven Rostedt #include <linux/hash.h> 257a8e76a3SSteven Rostedt #include <linux/list.h> 26554f786eSSteven Rostedt #include <linux/cpu.h> 27927e56dbSSteven Rostedt (VMware) #include <linux/oom.h> 287a8e76a3SSteven Rostedt 2979615760SChristoph Lameter #include <asm/local.h> 30182e9f5fSSteven Rostedt 3183f40318SVaibhav Nagarnaik static void update_pages_handler(struct work_struct *work); 3283f40318SVaibhav Nagarnaik 33033601a3SSteven Rostedt /* 34d1b182a8SSteven Rostedt * The ring buffer header is special. We must manually up keep it. 35d1b182a8SSteven Rostedt */ 36d1b182a8SSteven Rostedt int ring_buffer_print_entry_header(struct trace_seq *s) 37d1b182a8SSteven Rostedt { 38c0cd93aaSSteven Rostedt (Red Hat) trace_seq_puts(s, "# compressed entry header\n"); 39c0cd93aaSSteven Rostedt (Red Hat) trace_seq_puts(s, "\ttype_len : 5 bits\n"); 40c0cd93aaSSteven Rostedt (Red Hat) trace_seq_puts(s, "\ttime_delta : 27 bits\n"); 41c0cd93aaSSteven Rostedt (Red Hat) trace_seq_puts(s, "\tarray : 32 bits\n"); 42c0cd93aaSSteven Rostedt (Red Hat) trace_seq_putc(s, '\n'); 43c0cd93aaSSteven Rostedt (Red Hat) trace_seq_printf(s, "\tpadding : type == %d\n", 44d1b182a8SSteven Rostedt RINGBUF_TYPE_PADDING); 45c0cd93aaSSteven Rostedt (Red Hat) trace_seq_printf(s, "\ttime_extend : type == %d\n", 46d1b182a8SSteven Rostedt RINGBUF_TYPE_TIME_EXTEND); 47dc4e2801STom Zanussi trace_seq_printf(s, "\ttime_stamp : type == %d\n", 48dc4e2801STom Zanussi RINGBUF_TYPE_TIME_STAMP); 49c0cd93aaSSteven Rostedt (Red Hat) trace_seq_printf(s, "\tdata max type_len == %d\n", 50334d4169SLai Jiangshan RINGBUF_TYPE_DATA_TYPE_LEN_MAX); 51d1b182a8SSteven Rostedt 52c0cd93aaSSteven Rostedt (Red Hat) return !trace_seq_has_overflowed(s); 53d1b182a8SSteven Rostedt } 54d1b182a8SSteven Rostedt 55d1b182a8SSteven Rostedt /* 565cc98548SSteven Rostedt * The ring buffer is made up of a list of pages. A separate list of pages is 575cc98548SSteven Rostedt * allocated for each CPU. A writer may only write to a buffer that is 585cc98548SSteven Rostedt * associated with the CPU it is currently executing on. A reader may read 595cc98548SSteven Rostedt * from any per cpu buffer. 605cc98548SSteven Rostedt * 615cc98548SSteven Rostedt * The reader is special. For each per cpu buffer, the reader has its own 625cc98548SSteven Rostedt * reader page. When a reader has read the entire reader page, this reader 635cc98548SSteven Rostedt * page is swapped with another page in the ring buffer. 645cc98548SSteven Rostedt * 655cc98548SSteven Rostedt * Now, as long as the writer is off the reader page, the reader can do what 665cc98548SSteven Rostedt * ever it wants with that page. The writer will never write to that page 675cc98548SSteven Rostedt * again (as long as it is out of the ring buffer). 685cc98548SSteven Rostedt * 695cc98548SSteven Rostedt * Here's some silly ASCII art. 705cc98548SSteven Rostedt * 715cc98548SSteven Rostedt * +------+ 725cc98548SSteven Rostedt * |reader| RING BUFFER 735cc98548SSteven Rostedt * |page | 745cc98548SSteven Rostedt * +------+ +---+ +---+ +---+ 755cc98548SSteven Rostedt * | |-->| |-->| | 765cc98548SSteven Rostedt * +---+ +---+ +---+ 775cc98548SSteven Rostedt * ^ | 785cc98548SSteven Rostedt * | | 795cc98548SSteven Rostedt * +---------------+ 805cc98548SSteven Rostedt * 815cc98548SSteven Rostedt * 825cc98548SSteven Rostedt * +------+ 835cc98548SSteven Rostedt * |reader| RING BUFFER 845cc98548SSteven Rostedt * |page |------------------v 855cc98548SSteven Rostedt * +------+ +---+ +---+ +---+ 865cc98548SSteven Rostedt * | |-->| |-->| | 875cc98548SSteven Rostedt * +---+ +---+ +---+ 885cc98548SSteven Rostedt * ^ | 895cc98548SSteven Rostedt * | | 905cc98548SSteven Rostedt * +---------------+ 915cc98548SSteven Rostedt * 925cc98548SSteven Rostedt * 935cc98548SSteven Rostedt * +------+ 945cc98548SSteven Rostedt * |reader| RING BUFFER 955cc98548SSteven Rostedt * |page |------------------v 965cc98548SSteven Rostedt * +------+ +---+ +---+ +---+ 975cc98548SSteven Rostedt * ^ | |-->| |-->| | 985cc98548SSteven Rostedt * | +---+ +---+ +---+ 995cc98548SSteven Rostedt * | | 1005cc98548SSteven Rostedt * | | 1015cc98548SSteven Rostedt * +------------------------------+ 1025cc98548SSteven Rostedt * 1035cc98548SSteven Rostedt * 1045cc98548SSteven Rostedt * +------+ 1055cc98548SSteven Rostedt * |buffer| RING BUFFER 1065cc98548SSteven Rostedt * |page |------------------v 1075cc98548SSteven Rostedt * +------+ +---+ +---+ +---+ 1085cc98548SSteven Rostedt * ^ | | | |-->| | 1095cc98548SSteven Rostedt * | New +---+ +---+ +---+ 1105cc98548SSteven Rostedt * | Reader------^ | 1115cc98548SSteven Rostedt * | page | 1125cc98548SSteven Rostedt * +------------------------------+ 1135cc98548SSteven Rostedt * 1145cc98548SSteven Rostedt * 1155cc98548SSteven Rostedt * After we make this swap, the reader can hand this page off to the splice 1165cc98548SSteven Rostedt * code and be done with it. It can even allocate a new page if it needs to 1175cc98548SSteven Rostedt * and swap that into the ring buffer. 1185cc98548SSteven Rostedt * 1195cc98548SSteven Rostedt * We will be using cmpxchg soon to make all this lockless. 1205cc98548SSteven Rostedt * 1215cc98548SSteven Rostedt */ 1225cc98548SSteven Rostedt 123499e5470SSteven Rostedt /* Used for individual buffers (after the counter) */ 124499e5470SSteven Rostedt #define RB_BUFFER_OFF (1 << 20) 125499e5470SSteven Rostedt 126474d32b6SSteven Rostedt #define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data) 127474d32b6SSteven Rostedt 128e3d6bf0aSSteven Rostedt #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) 12967d34724SAndrew Morton #define RB_ALIGNMENT 4U 130334d4169SLai Jiangshan #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 131c7b09308SSteven Rostedt #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ 13286b3de60SSteven Rostedt (VMware) #define RB_ALIGN_DATA __aligned(RB_ALIGNMENT) 133649508f6SJames Hogan 134334d4169SLai Jiangshan /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ 135334d4169SLai Jiangshan #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX 1367a8e76a3SSteven Rostedt 1377a8e76a3SSteven Rostedt enum { 1387a8e76a3SSteven Rostedt RB_LEN_TIME_EXTEND = 8, 139dc4e2801STom Zanussi RB_LEN_TIME_STAMP = 8, 1407a8e76a3SSteven Rostedt }; 1417a8e76a3SSteven Rostedt 14269d1b839SSteven Rostedt #define skip_time_extend(event) \ 14369d1b839SSteven Rostedt ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND)) 14469d1b839SSteven Rostedt 145dc4e2801STom Zanussi #define extended_time(event) \ 146dc4e2801STom Zanussi (event->type_len >= RINGBUF_TYPE_TIME_EXTEND) 147dc4e2801STom Zanussi 1482d622719STom Zanussi static inline int rb_null_event(struct ring_buffer_event *event) 1492d622719STom Zanussi { 150a1863c21SSteven Rostedt return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; 1512d622719STom Zanussi } 1522d622719STom Zanussi 1532d622719STom Zanussi static void rb_event_set_padding(struct ring_buffer_event *event) 1542d622719STom Zanussi { 155a1863c21SSteven Rostedt /* padding has a NULL time_delta */ 156334d4169SLai Jiangshan event->type_len = RINGBUF_TYPE_PADDING; 1572d622719STom Zanussi event->time_delta = 0; 1582d622719STom Zanussi } 1592d622719STom Zanussi 1602d622719STom Zanussi static unsigned 1612d622719STom Zanussi rb_event_data_length(struct ring_buffer_event *event) 1622d622719STom Zanussi { 1632d622719STom Zanussi unsigned length; 1642d622719STom Zanussi 165334d4169SLai Jiangshan if (event->type_len) 166334d4169SLai Jiangshan length = event->type_len * RB_ALIGNMENT; 1672d622719STom Zanussi else 1682d622719STom Zanussi length = event->array[0]; 1692d622719STom Zanussi return length + RB_EVNT_HDR_SIZE; 1702d622719STom Zanussi } 1712d622719STom Zanussi 17269d1b839SSteven Rostedt /* 17369d1b839SSteven Rostedt * Return the length of the given event. Will return 17469d1b839SSteven Rostedt * the length of the time extend if the event is a 17569d1b839SSteven Rostedt * time extend. 17669d1b839SSteven Rostedt */ 17769d1b839SSteven Rostedt static inline unsigned 1787a8e76a3SSteven Rostedt rb_event_length(struct ring_buffer_event *event) 1797a8e76a3SSteven Rostedt { 180334d4169SLai Jiangshan switch (event->type_len) { 1817a8e76a3SSteven Rostedt case RINGBUF_TYPE_PADDING: 1822d622719STom Zanussi if (rb_null_event(event)) 1837a8e76a3SSteven Rostedt /* undefined */ 1847a8e76a3SSteven Rostedt return -1; 185334d4169SLai Jiangshan return event->array[0] + RB_EVNT_HDR_SIZE; 1867a8e76a3SSteven Rostedt 1877a8e76a3SSteven Rostedt case RINGBUF_TYPE_TIME_EXTEND: 1887a8e76a3SSteven Rostedt return RB_LEN_TIME_EXTEND; 1897a8e76a3SSteven Rostedt 1907a8e76a3SSteven Rostedt case RINGBUF_TYPE_TIME_STAMP: 1917a8e76a3SSteven Rostedt return RB_LEN_TIME_STAMP; 1927a8e76a3SSteven Rostedt 1937a8e76a3SSteven Rostedt case RINGBUF_TYPE_DATA: 1942d622719STom Zanussi return rb_event_data_length(event); 1957a8e76a3SSteven Rostedt default: 196da4d401aSSteven Rostedt (VMware) WARN_ON_ONCE(1); 1977a8e76a3SSteven Rostedt } 1987a8e76a3SSteven Rostedt /* not hit */ 1997a8e76a3SSteven Rostedt return 0; 2007a8e76a3SSteven Rostedt } 2017a8e76a3SSteven Rostedt 20269d1b839SSteven Rostedt /* 20369d1b839SSteven Rostedt * Return total length of time extend and data, 20469d1b839SSteven Rostedt * or just the event length for all other events. 20569d1b839SSteven Rostedt */ 20669d1b839SSteven Rostedt static inline unsigned 20769d1b839SSteven Rostedt rb_event_ts_length(struct ring_buffer_event *event) 20869d1b839SSteven Rostedt { 20969d1b839SSteven Rostedt unsigned len = 0; 21069d1b839SSteven Rostedt 211dc4e2801STom Zanussi if (extended_time(event)) { 21269d1b839SSteven Rostedt /* time extends include the data event after it */ 21369d1b839SSteven Rostedt len = RB_LEN_TIME_EXTEND; 21469d1b839SSteven Rostedt event = skip_time_extend(event); 21569d1b839SSteven Rostedt } 21669d1b839SSteven Rostedt return len + rb_event_length(event); 21769d1b839SSteven Rostedt } 21869d1b839SSteven Rostedt 2197a8e76a3SSteven Rostedt /** 2207a8e76a3SSteven Rostedt * ring_buffer_event_length - return the length of the event 2217a8e76a3SSteven Rostedt * @event: the event to get the length of 22269d1b839SSteven Rostedt * 22369d1b839SSteven Rostedt * Returns the size of the data load of a data event. 22469d1b839SSteven Rostedt * If the event is something other than a data event, it 22569d1b839SSteven Rostedt * returns the size of the event itself. With the exception 22669d1b839SSteven Rostedt * of a TIME EXTEND, where it still returns the size of the 22769d1b839SSteven Rostedt * data load of the data event after it. 2287a8e76a3SSteven Rostedt */ 2297a8e76a3SSteven Rostedt unsigned ring_buffer_event_length(struct ring_buffer_event *event) 2307a8e76a3SSteven Rostedt { 23169d1b839SSteven Rostedt unsigned length; 23269d1b839SSteven Rostedt 233dc4e2801STom Zanussi if (extended_time(event)) 23469d1b839SSteven Rostedt event = skip_time_extend(event); 23569d1b839SSteven Rostedt 23669d1b839SSteven Rostedt length = rb_event_length(event); 237334d4169SLai Jiangshan if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 238465634adSRobert Richter return length; 239465634adSRobert Richter length -= RB_EVNT_HDR_SIZE; 240465634adSRobert Richter if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0])) 241465634adSRobert Richter length -= sizeof(event->array[0]); 242465634adSRobert Richter return length; 2437a8e76a3SSteven Rostedt } 244c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_event_length); 2457a8e76a3SSteven Rostedt 2467a8e76a3SSteven Rostedt /* inline for ring buffer fast paths */ 247929ddbf3SSteven Rostedt (Red Hat) static __always_inline void * 2487a8e76a3SSteven Rostedt rb_event_data(struct ring_buffer_event *event) 2497a8e76a3SSteven Rostedt { 250dc4e2801STom Zanussi if (extended_time(event)) 25169d1b839SSteven Rostedt event = skip_time_extend(event); 252da4d401aSSteven Rostedt (VMware) WARN_ON_ONCE(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); 2537a8e76a3SSteven Rostedt /* If length is in len field, then array[0] has the data */ 254334d4169SLai Jiangshan if (event->type_len) 2557a8e76a3SSteven Rostedt return (void *)&event->array[0]; 2567a8e76a3SSteven Rostedt /* Otherwise length is in array[0] and array[1] has the data */ 2577a8e76a3SSteven Rostedt return (void *)&event->array[1]; 2587a8e76a3SSteven Rostedt } 2597a8e76a3SSteven Rostedt 2607a8e76a3SSteven Rostedt /** 2617a8e76a3SSteven Rostedt * ring_buffer_event_data - return the data of the event 2627a8e76a3SSteven Rostedt * @event: the event to get the data from 2637a8e76a3SSteven Rostedt */ 2647a8e76a3SSteven Rostedt void *ring_buffer_event_data(struct ring_buffer_event *event) 2657a8e76a3SSteven Rostedt { 2667a8e76a3SSteven Rostedt return rb_event_data(event); 2677a8e76a3SSteven Rostedt } 268c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_event_data); 2697a8e76a3SSteven Rostedt 2707a8e76a3SSteven Rostedt #define for_each_buffer_cpu(buffer, cpu) \ 2719e01c1b7SRusty Russell for_each_cpu(cpu, buffer->cpumask) 2727a8e76a3SSteven Rostedt 273b23d7a5fSNicholas Piggin #define for_each_online_buffer_cpu(buffer, cpu) \ 274b23d7a5fSNicholas Piggin for_each_cpu_and(cpu, buffer->cpumask, cpu_online_mask) 275b23d7a5fSNicholas Piggin 2767a8e76a3SSteven Rostedt #define TS_SHIFT 27 2777a8e76a3SSteven Rostedt #define TS_MASK ((1ULL << TS_SHIFT) - 1) 2787a8e76a3SSteven Rostedt #define TS_DELTA_TEST (~TS_MASK) 2797a8e76a3SSteven Rostedt 280dc4e2801STom Zanussi /** 281dc4e2801STom Zanussi * ring_buffer_event_time_stamp - return the event's extended timestamp 282dc4e2801STom Zanussi * @event: the event to get the timestamp of 283dc4e2801STom Zanussi * 284dc4e2801STom Zanussi * Returns the extended timestamp associated with a data event. 285dc4e2801STom Zanussi * An extended time_stamp is a 64-bit timestamp represented 286dc4e2801STom Zanussi * internally in a special way that makes the best use of space 287dc4e2801STom Zanussi * contained within a ring buffer event. This function decodes 288dc4e2801STom Zanussi * it and maps it to a straight u64 value. 289dc4e2801STom Zanussi */ 290dc4e2801STom Zanussi u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event) 291dc4e2801STom Zanussi { 292dc4e2801STom Zanussi u64 ts; 293dc4e2801STom Zanussi 294dc4e2801STom Zanussi ts = event->array[0]; 295dc4e2801STom Zanussi ts <<= TS_SHIFT; 296dc4e2801STom Zanussi ts += event->time_delta; 297dc4e2801STom Zanussi 298dc4e2801STom Zanussi return ts; 299dc4e2801STom Zanussi } 300dc4e2801STom Zanussi 30166a8cb95SSteven Rostedt /* Flag when events were overwritten */ 30266a8cb95SSteven Rostedt #define RB_MISSED_EVENTS (1 << 31) 303ff0ff84aSSteven Rostedt /* Missed count stored at end */ 304ff0ff84aSSteven Rostedt #define RB_MISSED_STORED (1 << 30) 30566a8cb95SSteven Rostedt 306abc9b56dSSteven Rostedt struct buffer_data_page { 3077a8e76a3SSteven Rostedt u64 time_stamp; /* page time stamp */ 308c3706f00SWenji Huang local_t commit; /* write committed index */ 309649508f6SJames Hogan unsigned char data[] RB_ALIGN_DATA; /* data of buffer page */ 310abc9b56dSSteven Rostedt }; 311abc9b56dSSteven Rostedt 31277ae365eSSteven Rostedt /* 31377ae365eSSteven Rostedt * Note, the buffer_page list must be first. The buffer pages 31477ae365eSSteven Rostedt * are allocated in cache lines, which means that each buffer 31577ae365eSSteven Rostedt * page will be at the beginning of a cache line, and thus 31677ae365eSSteven Rostedt * the least significant bits will be zero. We use this to 31777ae365eSSteven Rostedt * add flags in the list struct pointers, to make the ring buffer 31877ae365eSSteven Rostedt * lockless. 31977ae365eSSteven Rostedt */ 320abc9b56dSSteven Rostedt struct buffer_page { 321778c55d4SSteven Rostedt struct list_head list; /* list of buffer pages */ 322abc9b56dSSteven Rostedt local_t write; /* index for next write */ 3236f807acdSSteven Rostedt unsigned read; /* index for next read */ 324778c55d4SSteven Rostedt local_t entries; /* entries on this page */ 325ff0ff84aSSteven Rostedt unsigned long real_end; /* real end of data */ 326abc9b56dSSteven Rostedt struct buffer_data_page *page; /* Actual data page */ 3277a8e76a3SSteven Rostedt }; 3287a8e76a3SSteven Rostedt 32977ae365eSSteven Rostedt /* 33077ae365eSSteven Rostedt * The buffer page counters, write and entries, must be reset 33177ae365eSSteven Rostedt * atomically when crossing page boundaries. To synchronize this 33277ae365eSSteven Rostedt * update, two counters are inserted into the number. One is 33377ae365eSSteven Rostedt * the actual counter for the write position or count on the page. 33477ae365eSSteven Rostedt * 33577ae365eSSteven Rostedt * The other is a counter of updaters. Before an update happens 33677ae365eSSteven Rostedt * the update partition of the counter is incremented. This will 33777ae365eSSteven Rostedt * allow the updater to update the counter atomically. 33877ae365eSSteven Rostedt * 33977ae365eSSteven Rostedt * The counter is 20 bits, and the state data is 12. 34077ae365eSSteven Rostedt */ 34177ae365eSSteven Rostedt #define RB_WRITE_MASK 0xfffff 34277ae365eSSteven Rostedt #define RB_WRITE_INTCNT (1 << 20) 34377ae365eSSteven Rostedt 344044fa782SSteven Rostedt static void rb_init_page(struct buffer_data_page *bpage) 345abc9b56dSSteven Rostedt { 346044fa782SSteven Rostedt local_set(&bpage->commit, 0); 347abc9b56dSSteven Rostedt } 348abc9b56dSSteven Rostedt 3497a8e76a3SSteven Rostedt /* 350ed56829cSSteven Rostedt * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing 351ed56829cSSteven Rostedt * this issue out. 352ed56829cSSteven Rostedt */ 35334a148bfSAndrew Morton static void free_buffer_page(struct buffer_page *bpage) 354ed56829cSSteven Rostedt { 3556ae2a076SSteven Rostedt free_page((unsigned long)bpage->page); 356e4c2ce82SSteven Rostedt kfree(bpage); 357ed56829cSSteven Rostedt } 358ed56829cSSteven Rostedt 359ed56829cSSteven Rostedt /* 3607a8e76a3SSteven Rostedt * We need to fit the time_stamp delta into 27 bits. 3617a8e76a3SSteven Rostedt */ 3627a8e76a3SSteven Rostedt static inline int test_time_stamp(u64 delta) 3637a8e76a3SSteven Rostedt { 3647a8e76a3SSteven Rostedt if (delta & TS_DELTA_TEST) 3657a8e76a3SSteven Rostedt return 1; 3667a8e76a3SSteven Rostedt return 0; 3677a8e76a3SSteven Rostedt } 3687a8e76a3SSteven Rostedt 369474d32b6SSteven Rostedt #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE) 3707a8e76a3SSteven Rostedt 371be957c44SSteven Rostedt /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ 372be957c44SSteven Rostedt #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) 373be957c44SSteven Rostedt 374d1b182a8SSteven Rostedt int ring_buffer_print_page_header(struct trace_seq *s) 375d1b182a8SSteven Rostedt { 376d1b182a8SSteven Rostedt struct buffer_data_page field; 377d1b182a8SSteven Rostedt 378c0cd93aaSSteven Rostedt (Red Hat) trace_seq_printf(s, "\tfield: u64 timestamp;\t" 37926a50744STom Zanussi "offset:0;\tsize:%u;\tsigned:%u;\n", 38026a50744STom Zanussi (unsigned int)sizeof(field.time_stamp), 38126a50744STom Zanussi (unsigned int)is_signed_type(u64)); 382d1b182a8SSteven Rostedt 383c0cd93aaSSteven Rostedt (Red Hat) trace_seq_printf(s, "\tfield: local_t commit;\t" 38426a50744STom Zanussi "offset:%u;\tsize:%u;\tsigned:%u;\n", 385d1b182a8SSteven Rostedt (unsigned int)offsetof(typeof(field), commit), 38626a50744STom Zanussi (unsigned int)sizeof(field.commit), 38726a50744STom Zanussi (unsigned int)is_signed_type(long)); 388d1b182a8SSteven Rostedt 389c0cd93aaSSteven Rostedt (Red Hat) trace_seq_printf(s, "\tfield: int overwrite;\t" 39066a8cb95SSteven Rostedt "offset:%u;\tsize:%u;\tsigned:%u;\n", 39166a8cb95SSteven Rostedt (unsigned int)offsetof(typeof(field), commit), 39266a8cb95SSteven Rostedt 1, 39366a8cb95SSteven Rostedt (unsigned int)is_signed_type(long)); 39466a8cb95SSteven Rostedt 395c0cd93aaSSteven Rostedt (Red Hat) trace_seq_printf(s, "\tfield: char data;\t" 39626a50744STom Zanussi "offset:%u;\tsize:%u;\tsigned:%u;\n", 397d1b182a8SSteven Rostedt (unsigned int)offsetof(typeof(field), data), 39826a50744STom Zanussi (unsigned int)BUF_PAGE_SIZE, 39926a50744STom Zanussi (unsigned int)is_signed_type(char)); 400d1b182a8SSteven Rostedt 401c0cd93aaSSteven Rostedt (Red Hat) return !trace_seq_has_overflowed(s); 402d1b182a8SSteven Rostedt } 403d1b182a8SSteven Rostedt 40415693458SSteven Rostedt (Red Hat) struct rb_irq_work { 40515693458SSteven Rostedt (Red Hat) struct irq_work work; 40615693458SSteven Rostedt (Red Hat) wait_queue_head_t waiters; 4071e0d6714SSteven Rostedt (Red Hat) wait_queue_head_t full_waiters; 40815693458SSteven Rostedt (Red Hat) bool waiters_pending; 4091e0d6714SSteven Rostedt (Red Hat) bool full_waiters_pending; 4101e0d6714SSteven Rostedt (Red Hat) bool wakeup_full; 41115693458SSteven Rostedt (Red Hat) }; 41215693458SSteven Rostedt (Red Hat) 4137a8e76a3SSteven Rostedt /* 414fcc742eaSSteven Rostedt (Red Hat) * Structure to hold event state and handle nested events. 415fcc742eaSSteven Rostedt (Red Hat) */ 416fcc742eaSSteven Rostedt (Red Hat) struct rb_event_info { 417fcc742eaSSteven Rostedt (Red Hat) u64 ts; 418fcc742eaSSteven Rostedt (Red Hat) u64 delta; 41958fbc3c6SSteven Rostedt (VMware) u64 before; 42058fbc3c6SSteven Rostedt (VMware) u64 after; 421fcc742eaSSteven Rostedt (Red Hat) unsigned long length; 422fcc742eaSSteven Rostedt (Red Hat) struct buffer_page *tail_page; 423fcc742eaSSteven Rostedt (Red Hat) int add_timestamp; 424fcc742eaSSteven Rostedt (Red Hat) }; 425fcc742eaSSteven Rostedt (Red Hat) 426fcc742eaSSteven Rostedt (Red Hat) /* 427a389d86fSSteven Rostedt (VMware) * Used for the add_timestamp 428a389d86fSSteven Rostedt (VMware) * NONE 4297c4b4a51SSteven Rostedt (VMware) * EXTEND - wants a time extend 4307c4b4a51SSteven Rostedt (VMware) * ABSOLUTE - the buffer requests all events to have absolute time stamps 431a389d86fSSteven Rostedt (VMware) * FORCE - force a full time stamp. 432a389d86fSSteven Rostedt (VMware) */ 433a389d86fSSteven Rostedt (VMware) enum { 4347c4b4a51SSteven Rostedt (VMware) RB_ADD_STAMP_NONE = 0, 4357c4b4a51SSteven Rostedt (VMware) RB_ADD_STAMP_EXTEND = BIT(1), 4367c4b4a51SSteven Rostedt (VMware) RB_ADD_STAMP_ABSOLUTE = BIT(2), 4377c4b4a51SSteven Rostedt (VMware) RB_ADD_STAMP_FORCE = BIT(3) 438a389d86fSSteven Rostedt (VMware) }; 439a389d86fSSteven Rostedt (VMware) /* 440a497adb4SSteven Rostedt (Red Hat) * Used for which event context the event is in. 441a497adb4SSteven Rostedt (Red Hat) * NMI = 0 442a497adb4SSteven Rostedt (Red Hat) * IRQ = 1 443a497adb4SSteven Rostedt (Red Hat) * SOFTIRQ = 2 444a497adb4SSteven Rostedt (Red Hat) * NORMAL = 3 445a497adb4SSteven Rostedt (Red Hat) * 446a497adb4SSteven Rostedt (Red Hat) * See trace_recursive_lock() comment below for more details. 447a497adb4SSteven Rostedt (Red Hat) */ 448a497adb4SSteven Rostedt (Red Hat) enum { 449a497adb4SSteven Rostedt (Red Hat) RB_CTX_NMI, 450a497adb4SSteven Rostedt (Red Hat) RB_CTX_IRQ, 451a497adb4SSteven Rostedt (Red Hat) RB_CTX_SOFTIRQ, 452a497adb4SSteven Rostedt (Red Hat) RB_CTX_NORMAL, 453a497adb4SSteven Rostedt (Red Hat) RB_CTX_MAX 454a497adb4SSteven Rostedt (Red Hat) }; 455a497adb4SSteven Rostedt (Red Hat) 45610464b4aSSteven Rostedt (VMware) #if BITS_PER_LONG == 32 45710464b4aSSteven Rostedt (VMware) #define RB_TIME_32 45810464b4aSSteven Rostedt (VMware) #endif 45910464b4aSSteven Rostedt (VMware) 46010464b4aSSteven Rostedt (VMware) /* To test on 64 bit machines */ 46110464b4aSSteven Rostedt (VMware) //#define RB_TIME_32 46210464b4aSSteven Rostedt (VMware) 46310464b4aSSteven Rostedt (VMware) #ifdef RB_TIME_32 46410464b4aSSteven Rostedt (VMware) 46510464b4aSSteven Rostedt (VMware) struct rb_time_struct { 46610464b4aSSteven Rostedt (VMware) local_t cnt; 46710464b4aSSteven Rostedt (VMware) local_t top; 46810464b4aSSteven Rostedt (VMware) local_t bottom; 46910464b4aSSteven Rostedt (VMware) }; 47010464b4aSSteven Rostedt (VMware) #else 47110464b4aSSteven Rostedt (VMware) #include <asm/local64.h> 47210464b4aSSteven Rostedt (VMware) struct rb_time_struct { 47310464b4aSSteven Rostedt (VMware) local64_t time; 47410464b4aSSteven Rostedt (VMware) }; 47510464b4aSSteven Rostedt (VMware) #endif 47610464b4aSSteven Rostedt (VMware) typedef struct rb_time_struct rb_time_t; 47710464b4aSSteven Rostedt (VMware) 478a497adb4SSteven Rostedt (Red Hat) /* 4797a8e76a3SSteven Rostedt * head_page == tail_page && head == tail then buffer is empty. 4807a8e76a3SSteven Rostedt */ 4817a8e76a3SSteven Rostedt struct ring_buffer_per_cpu { 4827a8e76a3SSteven Rostedt int cpu; 483985023deSRichard Kennedy atomic_t record_disabled; 48407b8b10eSSteven Rostedt (VMware) atomic_t resize_disabled; 48513292494SSteven Rostedt (VMware) struct trace_buffer *buffer; 4865389f6faSThomas Gleixner raw_spinlock_t reader_lock; /* serialize readers */ 487445c8951SThomas Gleixner arch_spinlock_t lock; 4887a8e76a3SSteven Rostedt struct lock_class_key lock_key; 48973a757e6SSteven Rostedt (VMware) struct buffer_data_page *free_page; 4909b94a8fbSSteven Rostedt (Red Hat) unsigned long nr_pages; 49158a09ec6SSteven Rostedt (Red Hat) unsigned int current_context; 4923adc54faSSteven Rostedt struct list_head *pages; 4936f807acdSSteven Rostedt struct buffer_page *head_page; /* read from head */ 4946f807acdSSteven Rostedt struct buffer_page *tail_page; /* write to tail */ 495c3706f00SWenji Huang struct buffer_page *commit_page; /* committed pages */ 496d769041fSSteven Rostedt struct buffer_page *reader_page; 49766a8cb95SSteven Rostedt unsigned long lost_events; 49866a8cb95SSteven Rostedt unsigned long last_overrun; 4998e012066SSteven Rostedt (VMware) unsigned long nest; 500c64e148aSVaibhav Nagarnaik local_t entries_bytes; 501e4906effSSteven Rostedt local_t entries; 502884bfe89SSlava Pestov local_t overrun; 503884bfe89SSlava Pestov local_t commit_overrun; 504884bfe89SSlava Pestov local_t dropped_events; 505fa743953SSteven Rostedt local_t committing; 506fa743953SSteven Rostedt local_t commits; 5072c2b0a78SSteven Rostedt (VMware) local_t pages_touched; 5082c2b0a78SSteven Rostedt (VMware) local_t pages_read; 50903329f99SSteven Rostedt (VMware) long last_pages_touch; 5102c2b0a78SSteven Rostedt (VMware) size_t shortest_full; 51177ae365eSSteven Rostedt unsigned long read; 512c64e148aSVaibhav Nagarnaik unsigned long read_bytes; 51310464b4aSSteven Rostedt (VMware) rb_time_t write_stamp; 51410464b4aSSteven Rostedt (VMware) rb_time_t before_stamp; 5157a8e76a3SSteven Rostedt u64 read_stamp; 516438ced17SVaibhav Nagarnaik /* ring buffer pages to update, > 0 to add, < 0 to remove */ 5179b94a8fbSSteven Rostedt (Red Hat) long nr_pages_to_update; 518438ced17SVaibhav Nagarnaik struct list_head new_pages; /* new pages to add */ 51983f40318SVaibhav Nagarnaik struct work_struct update_pages_work; 52005fdd70dSVaibhav Nagarnaik struct completion update_done; 52115693458SSteven Rostedt (Red Hat) 52215693458SSteven Rostedt (Red Hat) struct rb_irq_work irq_work; 5237a8e76a3SSteven Rostedt }; 5247a8e76a3SSteven Rostedt 52513292494SSteven Rostedt (VMware) struct trace_buffer { 5267a8e76a3SSteven Rostedt unsigned flags; 5277a8e76a3SSteven Rostedt int cpus; 5287a8e76a3SSteven Rostedt atomic_t record_disabled; 52900f62f61SArnaldo Carvalho de Melo cpumask_var_t cpumask; 5307a8e76a3SSteven Rostedt 5311f8a6a10SPeter Zijlstra struct lock_class_key *reader_lock_key; 5321f8a6a10SPeter Zijlstra 5337a8e76a3SSteven Rostedt struct mutex mutex; 5347a8e76a3SSteven Rostedt 5357a8e76a3SSteven Rostedt struct ring_buffer_per_cpu **buffers; 536554f786eSSteven Rostedt 537b32614c0SSebastian Andrzej Siewior struct hlist_node node; 53837886f6aSSteven Rostedt u64 (*clock)(void); 53915693458SSteven Rostedt (Red Hat) 54015693458SSteven Rostedt (Red Hat) struct rb_irq_work irq_work; 54100b41452STom Zanussi bool time_stamp_abs; 5427a8e76a3SSteven Rostedt }; 5437a8e76a3SSteven Rostedt 5447a8e76a3SSteven Rostedt struct ring_buffer_iter { 5457a8e76a3SSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer; 5467a8e76a3SSteven Rostedt unsigned long head; 547785888c5SSteven Rostedt (VMware) unsigned long next_event; 5487a8e76a3SSteven Rostedt struct buffer_page *head_page; 549492a74f4SSteven Rostedt struct buffer_page *cache_reader_page; 550492a74f4SSteven Rostedt unsigned long cache_read; 5517a8e76a3SSteven Rostedt u64 read_stamp; 55228e3fc56SSteven Rostedt (VMware) u64 page_stamp; 553785888c5SSteven Rostedt (VMware) struct ring_buffer_event *event; 554c9b7a4a7SSteven Rostedt (VMware) int missed_events; 5557a8e76a3SSteven Rostedt }; 5567a8e76a3SSteven Rostedt 55710464b4aSSteven Rostedt (VMware) #ifdef RB_TIME_32 55810464b4aSSteven Rostedt (VMware) 55910464b4aSSteven Rostedt (VMware) /* 56010464b4aSSteven Rostedt (VMware) * On 32 bit machines, local64_t is very expensive. As the ring 56110464b4aSSteven Rostedt (VMware) * buffer doesn't need all the features of a true 64 bit atomic, 56210464b4aSSteven Rostedt (VMware) * on 32 bit, it uses these functions (64 still uses local64_t). 56310464b4aSSteven Rostedt (VMware) * 56410464b4aSSteven Rostedt (VMware) * For the ring buffer, 64 bit required operations for the time is 56510464b4aSSteven Rostedt (VMware) * the following: 56610464b4aSSteven Rostedt (VMware) * 56710464b4aSSteven Rostedt (VMware) * - Only need 59 bits (uses 60 to make it even). 56810464b4aSSteven Rostedt (VMware) * - Reads may fail if it interrupted a modification of the time stamp. 56910464b4aSSteven Rostedt (VMware) * It will succeed if it did not interrupt another write even if 57010464b4aSSteven Rostedt (VMware) * the read itself is interrupted by a write. 57110464b4aSSteven Rostedt (VMware) * It returns whether it was successful or not. 57210464b4aSSteven Rostedt (VMware) * 57310464b4aSSteven Rostedt (VMware) * - Writes always succeed and will overwrite other writes and writes 57410464b4aSSteven Rostedt (VMware) * that were done by events interrupting the current write. 57510464b4aSSteven Rostedt (VMware) * 57610464b4aSSteven Rostedt (VMware) * - A write followed by a read of the same time stamp will always succeed, 57710464b4aSSteven Rostedt (VMware) * but may not contain the same value. 57810464b4aSSteven Rostedt (VMware) * 57910464b4aSSteven Rostedt (VMware) * - A cmpxchg will fail if it interrupted another write or cmpxchg. 58010464b4aSSteven Rostedt (VMware) * Other than that, it acts like a normal cmpxchg. 58110464b4aSSteven Rostedt (VMware) * 58210464b4aSSteven Rostedt (VMware) * The 60 bit time stamp is broken up by 30 bits in a top and bottom half 58310464b4aSSteven Rostedt (VMware) * (bottom being the least significant 30 bits of the 60 bit time stamp). 58410464b4aSSteven Rostedt (VMware) * 58510464b4aSSteven Rostedt (VMware) * The two most significant bits of each half holds a 2 bit counter (0-3). 58610464b4aSSteven Rostedt (VMware) * Each update will increment this counter by one. 58710464b4aSSteven Rostedt (VMware) * When reading the top and bottom, if the two counter bits match then the 58810464b4aSSteven Rostedt (VMware) * top and bottom together make a valid 60 bit number. 58910464b4aSSteven Rostedt (VMware) */ 59010464b4aSSteven Rostedt (VMware) #define RB_TIME_SHIFT 30 59110464b4aSSteven Rostedt (VMware) #define RB_TIME_VAL_MASK ((1 << RB_TIME_SHIFT) - 1) 59210464b4aSSteven Rostedt (VMware) 59310464b4aSSteven Rostedt (VMware) static inline int rb_time_cnt(unsigned long val) 59410464b4aSSteven Rostedt (VMware) { 59510464b4aSSteven Rostedt (VMware) return (val >> RB_TIME_SHIFT) & 3; 59610464b4aSSteven Rostedt (VMware) } 59710464b4aSSteven Rostedt (VMware) 59810464b4aSSteven Rostedt (VMware) static inline u64 rb_time_val(unsigned long top, unsigned long bottom) 59910464b4aSSteven Rostedt (VMware) { 60010464b4aSSteven Rostedt (VMware) u64 val; 60110464b4aSSteven Rostedt (VMware) 60210464b4aSSteven Rostedt (VMware) val = top & RB_TIME_VAL_MASK; 60310464b4aSSteven Rostedt (VMware) val <<= RB_TIME_SHIFT; 60410464b4aSSteven Rostedt (VMware) val |= bottom & RB_TIME_VAL_MASK; 60510464b4aSSteven Rostedt (VMware) 60610464b4aSSteven Rostedt (VMware) return val; 60710464b4aSSteven Rostedt (VMware) } 60810464b4aSSteven Rostedt (VMware) 60910464b4aSSteven Rostedt (VMware) static inline bool __rb_time_read(rb_time_t *t, u64 *ret, unsigned long *cnt) 61010464b4aSSteven Rostedt (VMware) { 61110464b4aSSteven Rostedt (VMware) unsigned long top, bottom; 61210464b4aSSteven Rostedt (VMware) unsigned long c; 61310464b4aSSteven Rostedt (VMware) 61410464b4aSSteven Rostedt (VMware) /* 61510464b4aSSteven Rostedt (VMware) * If the read is interrupted by a write, then the cnt will 61610464b4aSSteven Rostedt (VMware) * be different. Loop until both top and bottom have been read 61710464b4aSSteven Rostedt (VMware) * without interruption. 61810464b4aSSteven Rostedt (VMware) */ 61910464b4aSSteven Rostedt (VMware) do { 62010464b4aSSteven Rostedt (VMware) c = local_read(&t->cnt); 62110464b4aSSteven Rostedt (VMware) top = local_read(&t->top); 62210464b4aSSteven Rostedt (VMware) bottom = local_read(&t->bottom); 62310464b4aSSteven Rostedt (VMware) } while (c != local_read(&t->cnt)); 62410464b4aSSteven Rostedt (VMware) 62510464b4aSSteven Rostedt (VMware) *cnt = rb_time_cnt(top); 62610464b4aSSteven Rostedt (VMware) 62710464b4aSSteven Rostedt (VMware) /* If top and bottom counts don't match, this interrupted a write */ 62810464b4aSSteven Rostedt (VMware) if (*cnt != rb_time_cnt(bottom)) 62910464b4aSSteven Rostedt (VMware) return false; 63010464b4aSSteven Rostedt (VMware) 63110464b4aSSteven Rostedt (VMware) *ret = rb_time_val(top, bottom); 63210464b4aSSteven Rostedt (VMware) return true; 63310464b4aSSteven Rostedt (VMware) } 63410464b4aSSteven Rostedt (VMware) 63510464b4aSSteven Rostedt (VMware) static bool rb_time_read(rb_time_t *t, u64 *ret) 63610464b4aSSteven Rostedt (VMware) { 63710464b4aSSteven Rostedt (VMware) unsigned long cnt; 63810464b4aSSteven Rostedt (VMware) 63910464b4aSSteven Rostedt (VMware) return __rb_time_read(t, ret, &cnt); 64010464b4aSSteven Rostedt (VMware) } 64110464b4aSSteven Rostedt (VMware) 64210464b4aSSteven Rostedt (VMware) static inline unsigned long rb_time_val_cnt(unsigned long val, unsigned long cnt) 64310464b4aSSteven Rostedt (VMware) { 64410464b4aSSteven Rostedt (VMware) return (val & RB_TIME_VAL_MASK) | ((cnt & 3) << RB_TIME_SHIFT); 64510464b4aSSteven Rostedt (VMware) } 64610464b4aSSteven Rostedt (VMware) 64710464b4aSSteven Rostedt (VMware) static inline void rb_time_split(u64 val, unsigned long *top, unsigned long *bottom) 64810464b4aSSteven Rostedt (VMware) { 64910464b4aSSteven Rostedt (VMware) *top = (unsigned long)((val >> RB_TIME_SHIFT) & RB_TIME_VAL_MASK); 65010464b4aSSteven Rostedt (VMware) *bottom = (unsigned long)(val & RB_TIME_VAL_MASK); 65110464b4aSSteven Rostedt (VMware) } 65210464b4aSSteven Rostedt (VMware) 65310464b4aSSteven Rostedt (VMware) static inline void rb_time_val_set(local_t *t, unsigned long val, unsigned long cnt) 65410464b4aSSteven Rostedt (VMware) { 65510464b4aSSteven Rostedt (VMware) val = rb_time_val_cnt(val, cnt); 65610464b4aSSteven Rostedt (VMware) local_set(t, val); 65710464b4aSSteven Rostedt (VMware) } 65810464b4aSSteven Rostedt (VMware) 65910464b4aSSteven Rostedt (VMware) static void rb_time_set(rb_time_t *t, u64 val) 66010464b4aSSteven Rostedt (VMware) { 66110464b4aSSteven Rostedt (VMware) unsigned long cnt, top, bottom; 66210464b4aSSteven Rostedt (VMware) 66310464b4aSSteven Rostedt (VMware) rb_time_split(val, &top, &bottom); 66410464b4aSSteven Rostedt (VMware) 66510464b4aSSteven Rostedt (VMware) /* Writes always succeed with a valid number even if it gets interrupted. */ 66610464b4aSSteven Rostedt (VMware) do { 66710464b4aSSteven Rostedt (VMware) cnt = local_inc_return(&t->cnt); 66810464b4aSSteven Rostedt (VMware) rb_time_val_set(&t->top, top, cnt); 66910464b4aSSteven Rostedt (VMware) rb_time_val_set(&t->bottom, bottom, cnt); 67010464b4aSSteven Rostedt (VMware) } while (cnt != local_read(&t->cnt)); 67110464b4aSSteven Rostedt (VMware) } 67210464b4aSSteven Rostedt (VMware) 67310464b4aSSteven Rostedt (VMware) static inline bool 67410464b4aSSteven Rostedt (VMware) rb_time_read_cmpxchg(local_t *l, unsigned long expect, unsigned long set) 67510464b4aSSteven Rostedt (VMware) { 67610464b4aSSteven Rostedt (VMware) unsigned long ret; 67710464b4aSSteven Rostedt (VMware) 67810464b4aSSteven Rostedt (VMware) ret = local_cmpxchg(l, expect, set); 67910464b4aSSteven Rostedt (VMware) return ret == expect; 68010464b4aSSteven Rostedt (VMware) } 68110464b4aSSteven Rostedt (VMware) 68210464b4aSSteven Rostedt (VMware) static int rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) 68310464b4aSSteven Rostedt (VMware) { 68410464b4aSSteven Rostedt (VMware) unsigned long cnt, top, bottom; 68510464b4aSSteven Rostedt (VMware) unsigned long cnt2, top2, bottom2; 68610464b4aSSteven Rostedt (VMware) u64 val; 68710464b4aSSteven Rostedt (VMware) 68810464b4aSSteven Rostedt (VMware) /* The cmpxchg always fails if it interrupted an update */ 68910464b4aSSteven Rostedt (VMware) if (!__rb_time_read(t, &val, &cnt2)) 69010464b4aSSteven Rostedt (VMware) return false; 69110464b4aSSteven Rostedt (VMware) 69210464b4aSSteven Rostedt (VMware) if (val != expect) 69310464b4aSSteven Rostedt (VMware) return false; 69410464b4aSSteven Rostedt (VMware) 69510464b4aSSteven Rostedt (VMware) cnt = local_read(&t->cnt); 69610464b4aSSteven Rostedt (VMware) if ((cnt & 3) != cnt2) 69710464b4aSSteven Rostedt (VMware) return false; 69810464b4aSSteven Rostedt (VMware) 69910464b4aSSteven Rostedt (VMware) cnt2 = cnt + 1; 70010464b4aSSteven Rostedt (VMware) 70110464b4aSSteven Rostedt (VMware) rb_time_split(val, &top, &bottom); 70210464b4aSSteven Rostedt (VMware) top = rb_time_val_cnt(top, cnt); 70310464b4aSSteven Rostedt (VMware) bottom = rb_time_val_cnt(bottom, cnt); 70410464b4aSSteven Rostedt (VMware) 70510464b4aSSteven Rostedt (VMware) rb_time_split(set, &top2, &bottom2); 70610464b4aSSteven Rostedt (VMware) top2 = rb_time_val_cnt(top2, cnt2); 70710464b4aSSteven Rostedt (VMware) bottom2 = rb_time_val_cnt(bottom2, cnt2); 70810464b4aSSteven Rostedt (VMware) 70910464b4aSSteven Rostedt (VMware) if (!rb_time_read_cmpxchg(&t->cnt, cnt, cnt2)) 71010464b4aSSteven Rostedt (VMware) return false; 71110464b4aSSteven Rostedt (VMware) if (!rb_time_read_cmpxchg(&t->top, top, top2)) 71210464b4aSSteven Rostedt (VMware) return false; 71310464b4aSSteven Rostedt (VMware) if (!rb_time_read_cmpxchg(&t->bottom, bottom, bottom2)) 71410464b4aSSteven Rostedt (VMware) return false; 71510464b4aSSteven Rostedt (VMware) return true; 71610464b4aSSteven Rostedt (VMware) } 71710464b4aSSteven Rostedt (VMware) 71810464b4aSSteven Rostedt (VMware) #else /* 64 bits */ 71910464b4aSSteven Rostedt (VMware) 72010464b4aSSteven Rostedt (VMware) /* local64_t always succeeds */ 72110464b4aSSteven Rostedt (VMware) 72210464b4aSSteven Rostedt (VMware) static inline bool rb_time_read(rb_time_t *t, u64 *ret) 72310464b4aSSteven Rostedt (VMware) { 72410464b4aSSteven Rostedt (VMware) *ret = local64_read(&t->time); 72510464b4aSSteven Rostedt (VMware) return true; 72610464b4aSSteven Rostedt (VMware) } 72710464b4aSSteven Rostedt (VMware) static void rb_time_set(rb_time_t *t, u64 val) 72810464b4aSSteven Rostedt (VMware) { 72910464b4aSSteven Rostedt (VMware) local64_set(&t->time, val); 73010464b4aSSteven Rostedt (VMware) } 73110464b4aSSteven Rostedt (VMware) 73210464b4aSSteven Rostedt (VMware) static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) 73310464b4aSSteven Rostedt (VMware) { 73410464b4aSSteven Rostedt (VMware) u64 val; 73510464b4aSSteven Rostedt (VMware) val = local64_cmpxchg(&t->time, expect, set); 73610464b4aSSteven Rostedt (VMware) return val == expect; 73710464b4aSSteven Rostedt (VMware) } 73810464b4aSSteven Rostedt (VMware) #endif 73910464b4aSSteven Rostedt (VMware) 7402c2b0a78SSteven Rostedt (VMware) /** 7412c2b0a78SSteven Rostedt (VMware) * ring_buffer_nr_pages - get the number of buffer pages in the ring buffer 7422c2b0a78SSteven Rostedt (VMware) * @buffer: The ring_buffer to get the number of pages from 7432c2b0a78SSteven Rostedt (VMware) * @cpu: The cpu of the ring_buffer to get the number of pages from 7442c2b0a78SSteven Rostedt (VMware) * 7452c2b0a78SSteven Rostedt (VMware) * Returns the number of pages used by a per_cpu buffer of the ring buffer. 7462c2b0a78SSteven Rostedt (VMware) */ 74713292494SSteven Rostedt (VMware) size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu) 7482c2b0a78SSteven Rostedt (VMware) { 7492c2b0a78SSteven Rostedt (VMware) return buffer->buffers[cpu]->nr_pages; 7502c2b0a78SSteven Rostedt (VMware) } 7512c2b0a78SSteven Rostedt (VMware) 7522c2b0a78SSteven Rostedt (VMware) /** 7532c2b0a78SSteven Rostedt (VMware) * ring_buffer_nr_pages_dirty - get the number of used pages in the ring buffer 7542c2b0a78SSteven Rostedt (VMware) * @buffer: The ring_buffer to get the number of pages from 7552c2b0a78SSteven Rostedt (VMware) * @cpu: The cpu of the ring_buffer to get the number of pages from 7562c2b0a78SSteven Rostedt (VMware) * 7572c2b0a78SSteven Rostedt (VMware) * Returns the number of pages that have content in the ring buffer. 7582c2b0a78SSteven Rostedt (VMware) */ 75913292494SSteven Rostedt (VMware) size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu) 7602c2b0a78SSteven Rostedt (VMware) { 7612c2b0a78SSteven Rostedt (VMware) size_t read; 7622c2b0a78SSteven Rostedt (VMware) size_t cnt; 7632c2b0a78SSteven Rostedt (VMware) 7642c2b0a78SSteven Rostedt (VMware) read = local_read(&buffer->buffers[cpu]->pages_read); 7652c2b0a78SSteven Rostedt (VMware) cnt = local_read(&buffer->buffers[cpu]->pages_touched); 7662c2b0a78SSteven Rostedt (VMware) /* The reader can read an empty page, but not more than that */ 7672c2b0a78SSteven Rostedt (VMware) if (cnt < read) { 7682c2b0a78SSteven Rostedt (VMware) WARN_ON_ONCE(read > cnt + 1); 7692c2b0a78SSteven Rostedt (VMware) return 0; 7702c2b0a78SSteven Rostedt (VMware) } 7712c2b0a78SSteven Rostedt (VMware) 7722c2b0a78SSteven Rostedt (VMware) return cnt - read; 7732c2b0a78SSteven Rostedt (VMware) } 7742c2b0a78SSteven Rostedt (VMware) 77515693458SSteven Rostedt (Red Hat) /* 77615693458SSteven Rostedt (Red Hat) * rb_wake_up_waiters - wake up tasks waiting for ring buffer input 77715693458SSteven Rostedt (Red Hat) * 77815693458SSteven Rostedt (Red Hat) * Schedules a delayed work to wake up any task that is blocked on the 77915693458SSteven Rostedt (Red Hat) * ring buffer waiters queue. 78015693458SSteven Rostedt (Red Hat) */ 78115693458SSteven Rostedt (Red Hat) static void rb_wake_up_waiters(struct irq_work *work) 78215693458SSteven Rostedt (Red Hat) { 78315693458SSteven Rostedt (Red Hat) struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work); 78415693458SSteven Rostedt (Red Hat) 78515693458SSteven Rostedt (Red Hat) wake_up_all(&rbwork->waiters); 7861e0d6714SSteven Rostedt (Red Hat) if (rbwork->wakeup_full) { 7871e0d6714SSteven Rostedt (Red Hat) rbwork->wakeup_full = false; 7881e0d6714SSteven Rostedt (Red Hat) wake_up_all(&rbwork->full_waiters); 7891e0d6714SSteven Rostedt (Red Hat) } 79015693458SSteven Rostedt (Red Hat) } 79115693458SSteven Rostedt (Red Hat) 79215693458SSteven Rostedt (Red Hat) /** 79315693458SSteven Rostedt (Red Hat) * ring_buffer_wait - wait for input to the ring buffer 79415693458SSteven Rostedt (Red Hat) * @buffer: buffer to wait on 79515693458SSteven Rostedt (Red Hat) * @cpu: the cpu buffer to wait on 796*e1981f75SQiujun Huang * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS 79715693458SSteven Rostedt (Red Hat) * 79815693458SSteven Rostedt (Red Hat) * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon 79915693458SSteven Rostedt (Red Hat) * as data is added to any of the @buffer's cpu buffers. Otherwise 80015693458SSteven Rostedt (Red Hat) * it will wait for data to be added to a specific cpu buffer. 80115693458SSteven Rostedt (Red Hat) */ 80213292494SSteven Rostedt (VMware) int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) 80315693458SSteven Rostedt (Red Hat) { 8043f649ab7SKees Cook struct ring_buffer_per_cpu *cpu_buffer; 80515693458SSteven Rostedt (Red Hat) DEFINE_WAIT(wait); 80615693458SSteven Rostedt (Red Hat) struct rb_irq_work *work; 807e30f53aaSRabin Vincent int ret = 0; 80815693458SSteven Rostedt (Red Hat) 80915693458SSteven Rostedt (Red Hat) /* 81015693458SSteven Rostedt (Red Hat) * Depending on what the caller is waiting for, either any 81115693458SSteven Rostedt (Red Hat) * data in any cpu buffer, or a specific buffer, put the 81215693458SSteven Rostedt (Red Hat) * caller on the appropriate wait queue. 81315693458SSteven Rostedt (Red Hat) */ 8141e0d6714SSteven Rostedt (Red Hat) if (cpu == RING_BUFFER_ALL_CPUS) { 81515693458SSteven Rostedt (Red Hat) work = &buffer->irq_work; 8161e0d6714SSteven Rostedt (Red Hat) /* Full only makes sense on per cpu reads */ 8172c2b0a78SSteven Rostedt (VMware) full = 0; 8181e0d6714SSteven Rostedt (Red Hat) } else { 8198b8b3683SSteven Rostedt (Red Hat) if (!cpumask_test_cpu(cpu, buffer->cpumask)) 8208b8b3683SSteven Rostedt (Red Hat) return -ENODEV; 82115693458SSteven Rostedt (Red Hat) cpu_buffer = buffer->buffers[cpu]; 82215693458SSteven Rostedt (Red Hat) work = &cpu_buffer->irq_work; 82315693458SSteven Rostedt (Red Hat) } 82415693458SSteven Rostedt (Red Hat) 82515693458SSteven Rostedt (Red Hat) 826e30f53aaSRabin Vincent while (true) { 8271e0d6714SSteven Rostedt (Red Hat) if (full) 8281e0d6714SSteven Rostedt (Red Hat) prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE); 8291e0d6714SSteven Rostedt (Red Hat) else 83015693458SSteven Rostedt (Red Hat) prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); 83115693458SSteven Rostedt (Red Hat) 83215693458SSteven Rostedt (Red Hat) /* 83315693458SSteven Rostedt (Red Hat) * The events can happen in critical sections where 83415693458SSteven Rostedt (Red Hat) * checking a work queue can cause deadlocks. 83515693458SSteven Rostedt (Red Hat) * After adding a task to the queue, this flag is set 83615693458SSteven Rostedt (Red Hat) * only to notify events to try to wake up the queue 83715693458SSteven Rostedt (Red Hat) * using irq_work. 83815693458SSteven Rostedt (Red Hat) * 83915693458SSteven Rostedt (Red Hat) * We don't clear it even if the buffer is no longer 84015693458SSteven Rostedt (Red Hat) * empty. The flag only causes the next event to run 84115693458SSteven Rostedt (Red Hat) * irq_work to do the work queue wake up. The worse 84215693458SSteven Rostedt (Red Hat) * that can happen if we race with !trace_empty() is that 84315693458SSteven Rostedt (Red Hat) * an event will cause an irq_work to try to wake up 84415693458SSteven Rostedt (Red Hat) * an empty queue. 84515693458SSteven Rostedt (Red Hat) * 84615693458SSteven Rostedt (Red Hat) * There's no reason to protect this flag either, as 84715693458SSteven Rostedt (Red Hat) * the work queue and irq_work logic will do the necessary 84815693458SSteven Rostedt (Red Hat) * synchronization for the wake ups. The only thing 84915693458SSteven Rostedt (Red Hat) * that is necessary is that the wake up happens after 85015693458SSteven Rostedt (Red Hat) * a task has been queued. It's OK for spurious wake ups. 85115693458SSteven Rostedt (Red Hat) */ 8521e0d6714SSteven Rostedt (Red Hat) if (full) 8531e0d6714SSteven Rostedt (Red Hat) work->full_waiters_pending = true; 8541e0d6714SSteven Rostedt (Red Hat) else 85515693458SSteven Rostedt (Red Hat) work->waiters_pending = true; 85615693458SSteven Rostedt (Red Hat) 857e30f53aaSRabin Vincent if (signal_pending(current)) { 858e30f53aaSRabin Vincent ret = -EINTR; 859e30f53aaSRabin Vincent break; 860e30f53aaSRabin Vincent } 861e30f53aaSRabin Vincent 862e30f53aaSRabin Vincent if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) 863e30f53aaSRabin Vincent break; 864e30f53aaSRabin Vincent 865e30f53aaSRabin Vincent if (cpu != RING_BUFFER_ALL_CPUS && 866e30f53aaSRabin Vincent !ring_buffer_empty_cpu(buffer, cpu)) { 867e30f53aaSRabin Vincent unsigned long flags; 868e30f53aaSRabin Vincent bool pagebusy; 8692c2b0a78SSteven Rostedt (VMware) size_t nr_pages; 8702c2b0a78SSteven Rostedt (VMware) size_t dirty; 871e30f53aaSRabin Vincent 872e30f53aaSRabin Vincent if (!full) 873e30f53aaSRabin Vincent break; 874e30f53aaSRabin Vincent 875e30f53aaSRabin Vincent raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 876e30f53aaSRabin Vincent pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; 8772c2b0a78SSteven Rostedt (VMware) nr_pages = cpu_buffer->nr_pages; 8782c2b0a78SSteven Rostedt (VMware) dirty = ring_buffer_nr_dirty_pages(buffer, cpu); 8792c2b0a78SSteven Rostedt (VMware) if (!cpu_buffer->shortest_full || 8802c2b0a78SSteven Rostedt (VMware) cpu_buffer->shortest_full < full) 8812c2b0a78SSteven Rostedt (VMware) cpu_buffer->shortest_full = full; 882e30f53aaSRabin Vincent raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 8832c2b0a78SSteven Rostedt (VMware) if (!pagebusy && 8842c2b0a78SSteven Rostedt (VMware) (!nr_pages || (dirty * 100) > full * nr_pages)) 885e30f53aaSRabin Vincent break; 886e30f53aaSRabin Vincent } 887e30f53aaSRabin Vincent 88815693458SSteven Rostedt (Red Hat) schedule(); 889e30f53aaSRabin Vincent } 89015693458SSteven Rostedt (Red Hat) 8911e0d6714SSteven Rostedt (Red Hat) if (full) 8921e0d6714SSteven Rostedt (Red Hat) finish_wait(&work->full_waiters, &wait); 8931e0d6714SSteven Rostedt (Red Hat) else 89415693458SSteven Rostedt (Red Hat) finish_wait(&work->waiters, &wait); 895e30f53aaSRabin Vincent 896e30f53aaSRabin Vincent return ret; 89715693458SSteven Rostedt (Red Hat) } 89815693458SSteven Rostedt (Red Hat) 89915693458SSteven Rostedt (Red Hat) /** 90015693458SSteven Rostedt (Red Hat) * ring_buffer_poll_wait - poll on buffer input 90115693458SSteven Rostedt (Red Hat) * @buffer: buffer to wait on 90215693458SSteven Rostedt (Red Hat) * @cpu: the cpu buffer to wait on 90315693458SSteven Rostedt (Red Hat) * @filp: the file descriptor 90415693458SSteven Rostedt (Red Hat) * @poll_table: The poll descriptor 90515693458SSteven Rostedt (Red Hat) * 90615693458SSteven Rostedt (Red Hat) * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon 90715693458SSteven Rostedt (Red Hat) * as data is added to any of the @buffer's cpu buffers. Otherwise 90815693458SSteven Rostedt (Red Hat) * it will wait for data to be added to a specific cpu buffer. 90915693458SSteven Rostedt (Red Hat) * 910a9a08845SLinus Torvalds * Returns EPOLLIN | EPOLLRDNORM if data exists in the buffers, 91115693458SSteven Rostedt (Red Hat) * zero otherwise. 91215693458SSteven Rostedt (Red Hat) */ 91313292494SSteven Rostedt (VMware) __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, 91415693458SSteven Rostedt (Red Hat) struct file *filp, poll_table *poll_table) 91515693458SSteven Rostedt (Red Hat) { 91615693458SSteven Rostedt (Red Hat) struct ring_buffer_per_cpu *cpu_buffer; 91715693458SSteven Rostedt (Red Hat) struct rb_irq_work *work; 91815693458SSteven Rostedt (Red Hat) 91915693458SSteven Rostedt (Red Hat) if (cpu == RING_BUFFER_ALL_CPUS) 92015693458SSteven Rostedt (Red Hat) work = &buffer->irq_work; 92115693458SSteven Rostedt (Red Hat) else { 9226721cb60SSteven Rostedt (Red Hat) if (!cpumask_test_cpu(cpu, buffer->cpumask)) 9236721cb60SSteven Rostedt (Red Hat) return -EINVAL; 9246721cb60SSteven Rostedt (Red Hat) 92515693458SSteven Rostedt (Red Hat) cpu_buffer = buffer->buffers[cpu]; 92615693458SSteven Rostedt (Red Hat) work = &cpu_buffer->irq_work; 92715693458SSteven Rostedt (Red Hat) } 92815693458SSteven Rostedt (Red Hat) 92915693458SSteven Rostedt (Red Hat) poll_wait(filp, &work->waiters, poll_table); 9304ce97dbfSJosef Bacik work->waiters_pending = true; 9314ce97dbfSJosef Bacik /* 9324ce97dbfSJosef Bacik * There's a tight race between setting the waiters_pending and 9334ce97dbfSJosef Bacik * checking if the ring buffer is empty. Once the waiters_pending bit 9344ce97dbfSJosef Bacik * is set, the next event will wake the task up, but we can get stuck 9354ce97dbfSJosef Bacik * if there's only a single event in. 9364ce97dbfSJosef Bacik * 9374ce97dbfSJosef Bacik * FIXME: Ideally, we need a memory barrier on the writer side as well, 9384ce97dbfSJosef Bacik * but adding a memory barrier to all events will cause too much of a 9394ce97dbfSJosef Bacik * performance hit in the fast path. We only need a memory barrier when 9404ce97dbfSJosef Bacik * the buffer goes from empty to having content. But as this race is 9414ce97dbfSJosef Bacik * extremely small, and it's not a problem if another event comes in, we 9424ce97dbfSJosef Bacik * will fix it later. 9434ce97dbfSJosef Bacik */ 9444ce97dbfSJosef Bacik smp_mb(); 94515693458SSteven Rostedt (Red Hat) 94615693458SSteven Rostedt (Red Hat) if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || 94715693458SSteven Rostedt (Red Hat) (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) 948a9a08845SLinus Torvalds return EPOLLIN | EPOLLRDNORM; 94915693458SSteven Rostedt (Red Hat) return 0; 95015693458SSteven Rostedt (Red Hat) } 95115693458SSteven Rostedt (Red Hat) 952f536aafcSSteven Rostedt /* buffer may be either ring_buffer or ring_buffer_per_cpu */ 953077c5407SSteven Rostedt #define RB_WARN_ON(b, cond) \ 9543e89c7bbSSteven Rostedt ({ \ 9553e89c7bbSSteven Rostedt int _____ret = unlikely(cond); \ 9563e89c7bbSSteven Rostedt if (_____ret) { \ 957077c5407SSteven Rostedt if (__same_type(*(b), struct ring_buffer_per_cpu)) { \ 958077c5407SSteven Rostedt struct ring_buffer_per_cpu *__b = \ 959077c5407SSteven Rostedt (void *)b; \ 960077c5407SSteven Rostedt atomic_inc(&__b->buffer->record_disabled); \ 961077c5407SSteven Rostedt } else \ 962077c5407SSteven Rostedt atomic_inc(&b->record_disabled); \ 963bf41a158SSteven Rostedt WARN_ON(1); \ 964bf41a158SSteven Rostedt } \ 9653e89c7bbSSteven Rostedt _____ret; \ 9663e89c7bbSSteven Rostedt }) 967f536aafcSSteven Rostedt 96837886f6aSSteven Rostedt /* Up this if you want to test the TIME_EXTENTS and normalization */ 96937886f6aSSteven Rostedt #define DEBUG_SHIFT 0 97037886f6aSSteven Rostedt 97113292494SSteven Rostedt (VMware) static inline u64 rb_time_stamp(struct trace_buffer *buffer) 97288eb0125SSteven Rostedt { 973bbeba3e5SSteven Rostedt (VMware) u64 ts; 974bbeba3e5SSteven Rostedt (VMware) 975bbeba3e5SSteven Rostedt (VMware) /* Skip retpolines :-( */ 976bbeba3e5SSteven Rostedt (VMware) if (IS_ENABLED(CONFIG_RETPOLINE) && likely(buffer->clock == trace_clock_local)) 977bbeba3e5SSteven Rostedt (VMware) ts = trace_clock_local(); 978bbeba3e5SSteven Rostedt (VMware) else 979bbeba3e5SSteven Rostedt (VMware) ts = buffer->clock(); 980bbeba3e5SSteven Rostedt (VMware) 98188eb0125SSteven Rostedt /* shift to debug/test normalization and TIME_EXTENTS */ 982bbeba3e5SSteven Rostedt (VMware) return ts << DEBUG_SHIFT; 98388eb0125SSteven Rostedt } 98488eb0125SSteven Rostedt 98513292494SSteven Rostedt (VMware) u64 ring_buffer_time_stamp(struct trace_buffer *buffer, int cpu) 98637886f6aSSteven Rostedt { 98737886f6aSSteven Rostedt u64 time; 98837886f6aSSteven Rostedt 98937886f6aSSteven Rostedt preempt_disable_notrace(); 9906d3f1e12SJiri Olsa time = rb_time_stamp(buffer); 991d6097c9eSPeter Zijlstra preempt_enable_notrace(); 99237886f6aSSteven Rostedt 99337886f6aSSteven Rostedt return time; 99437886f6aSSteven Rostedt } 99537886f6aSSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_time_stamp); 99637886f6aSSteven Rostedt 99713292494SSteven Rostedt (VMware) void ring_buffer_normalize_time_stamp(struct trace_buffer *buffer, 99837886f6aSSteven Rostedt int cpu, u64 *ts) 99937886f6aSSteven Rostedt { 100037886f6aSSteven Rostedt /* Just stupid testing the normalize function and deltas */ 100137886f6aSSteven Rostedt *ts >>= DEBUG_SHIFT; 100237886f6aSSteven Rostedt } 100337886f6aSSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); 100437886f6aSSteven Rostedt 100577ae365eSSteven Rostedt /* 100677ae365eSSteven Rostedt * Making the ring buffer lockless makes things tricky. 100777ae365eSSteven Rostedt * Although writes only happen on the CPU that they are on, 100877ae365eSSteven Rostedt * and they only need to worry about interrupts. Reads can 100977ae365eSSteven Rostedt * happen on any CPU. 101077ae365eSSteven Rostedt * 101177ae365eSSteven Rostedt * The reader page is always off the ring buffer, but when the 101277ae365eSSteven Rostedt * reader finishes with a page, it needs to swap its page with 101377ae365eSSteven Rostedt * a new one from the buffer. The reader needs to take from 101477ae365eSSteven Rostedt * the head (writes go to the tail). But if a writer is in overwrite 101577ae365eSSteven Rostedt * mode and wraps, it must push the head page forward. 101677ae365eSSteven Rostedt * 101777ae365eSSteven Rostedt * Here lies the problem. 101877ae365eSSteven Rostedt * 101977ae365eSSteven Rostedt * The reader must be careful to replace only the head page, and 102077ae365eSSteven Rostedt * not another one. As described at the top of the file in the 102177ae365eSSteven Rostedt * ASCII art, the reader sets its old page to point to the next 102277ae365eSSteven Rostedt * page after head. It then sets the page after head to point to 102377ae365eSSteven Rostedt * the old reader page. But if the writer moves the head page 102477ae365eSSteven Rostedt * during this operation, the reader could end up with the tail. 102577ae365eSSteven Rostedt * 102677ae365eSSteven Rostedt * We use cmpxchg to help prevent this race. We also do something 102777ae365eSSteven Rostedt * special with the page before head. We set the LSB to 1. 102877ae365eSSteven Rostedt * 102977ae365eSSteven Rostedt * When the writer must push the page forward, it will clear the 103077ae365eSSteven Rostedt * bit that points to the head page, move the head, and then set 103177ae365eSSteven Rostedt * the bit that points to the new head page. 103277ae365eSSteven Rostedt * 103377ae365eSSteven Rostedt * We also don't want an interrupt coming in and moving the head 103477ae365eSSteven Rostedt * page on another writer. Thus we use the second LSB to catch 103577ae365eSSteven Rostedt * that too. Thus: 103677ae365eSSteven Rostedt * 103777ae365eSSteven Rostedt * head->list->prev->next bit 1 bit 0 103877ae365eSSteven Rostedt * ------- ------- 103977ae365eSSteven Rostedt * Normal page 0 0 104077ae365eSSteven Rostedt * Points to head page 0 1 104177ae365eSSteven Rostedt * New head page 1 0 104277ae365eSSteven Rostedt * 104377ae365eSSteven Rostedt * Note we can not trust the prev pointer of the head page, because: 104477ae365eSSteven Rostedt * 104577ae365eSSteven Rostedt * +----+ +-----+ +-----+ 104677ae365eSSteven Rostedt * | |------>| T |---X--->| N | 104777ae365eSSteven Rostedt * | |<------| | | | 104877ae365eSSteven Rostedt * +----+ +-----+ +-----+ 104977ae365eSSteven Rostedt * ^ ^ | 105077ae365eSSteven Rostedt * | +-----+ | | 105177ae365eSSteven Rostedt * +----------| R |----------+ | 105277ae365eSSteven Rostedt * | |<-----------+ 105377ae365eSSteven Rostedt * +-----+ 105477ae365eSSteven Rostedt * 105577ae365eSSteven Rostedt * Key: ---X--> HEAD flag set in pointer 105677ae365eSSteven Rostedt * T Tail page 105777ae365eSSteven Rostedt * R Reader page 105877ae365eSSteven Rostedt * N Next page 105977ae365eSSteven Rostedt * 106077ae365eSSteven Rostedt * (see __rb_reserve_next() to see where this happens) 106177ae365eSSteven Rostedt * 106277ae365eSSteven Rostedt * What the above shows is that the reader just swapped out 106377ae365eSSteven Rostedt * the reader page with a page in the buffer, but before it 106477ae365eSSteven Rostedt * could make the new header point back to the new page added 106577ae365eSSteven Rostedt * it was preempted by a writer. The writer moved forward onto 106677ae365eSSteven Rostedt * the new page added by the reader and is about to move forward 106777ae365eSSteven Rostedt * again. 106877ae365eSSteven Rostedt * 106977ae365eSSteven Rostedt * You can see, it is legitimate for the previous pointer of 107077ae365eSSteven Rostedt * the head (or any page) not to point back to itself. But only 10716167c205SSteven Rostedt (VMware) * temporarily. 107277ae365eSSteven Rostedt */ 107377ae365eSSteven Rostedt 107477ae365eSSteven Rostedt #define RB_PAGE_NORMAL 0UL 107577ae365eSSteven Rostedt #define RB_PAGE_HEAD 1UL 107677ae365eSSteven Rostedt #define RB_PAGE_UPDATE 2UL 107777ae365eSSteven Rostedt 107877ae365eSSteven Rostedt 107977ae365eSSteven Rostedt #define RB_FLAG_MASK 3UL 108077ae365eSSteven Rostedt 108177ae365eSSteven Rostedt /* PAGE_MOVED is not part of the mask */ 108277ae365eSSteven Rostedt #define RB_PAGE_MOVED 4UL 108377ae365eSSteven Rostedt 108477ae365eSSteven Rostedt /* 108577ae365eSSteven Rostedt * rb_list_head - remove any bit 108677ae365eSSteven Rostedt */ 108777ae365eSSteven Rostedt static struct list_head *rb_list_head(struct list_head *list) 108877ae365eSSteven Rostedt { 108977ae365eSSteven Rostedt unsigned long val = (unsigned long)list; 109077ae365eSSteven Rostedt 109177ae365eSSteven Rostedt return (struct list_head *)(val & ~RB_FLAG_MASK); 109277ae365eSSteven Rostedt } 109377ae365eSSteven Rostedt 109477ae365eSSteven Rostedt /* 10956d3f1e12SJiri Olsa * rb_is_head_page - test if the given page is the head page 109677ae365eSSteven Rostedt * 109777ae365eSSteven Rostedt * Because the reader may move the head_page pointer, we can 109877ae365eSSteven Rostedt * not trust what the head page is (it may be pointing to 109977ae365eSSteven Rostedt * the reader page). But if the next page is a header page, 110077ae365eSSteven Rostedt * its flags will be non zero. 110177ae365eSSteven Rostedt */ 110242b16b3fSJesper Juhl static inline int 110377ae365eSSteven Rostedt rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer, 110477ae365eSSteven Rostedt struct buffer_page *page, struct list_head *list) 110577ae365eSSteven Rostedt { 110677ae365eSSteven Rostedt unsigned long val; 110777ae365eSSteven Rostedt 110877ae365eSSteven Rostedt val = (unsigned long)list->next; 110977ae365eSSteven Rostedt 111077ae365eSSteven Rostedt if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list) 111177ae365eSSteven Rostedt return RB_PAGE_MOVED; 111277ae365eSSteven Rostedt 111377ae365eSSteven Rostedt return val & RB_FLAG_MASK; 111477ae365eSSteven Rostedt } 111577ae365eSSteven Rostedt 111677ae365eSSteven Rostedt /* 111777ae365eSSteven Rostedt * rb_is_reader_page 111877ae365eSSteven Rostedt * 111977ae365eSSteven Rostedt * The unique thing about the reader page, is that, if the 112077ae365eSSteven Rostedt * writer is ever on it, the previous pointer never points 112177ae365eSSteven Rostedt * back to the reader page. 112277ae365eSSteven Rostedt */ 112306ca3209SYaowei Bai static bool rb_is_reader_page(struct buffer_page *page) 112477ae365eSSteven Rostedt { 112577ae365eSSteven Rostedt struct list_head *list = page->list.prev; 112677ae365eSSteven Rostedt 112777ae365eSSteven Rostedt return rb_list_head(list->next) != &page->list; 112877ae365eSSteven Rostedt } 112977ae365eSSteven Rostedt 113077ae365eSSteven Rostedt /* 113177ae365eSSteven Rostedt * rb_set_list_to_head - set a list_head to be pointing to head. 113277ae365eSSteven Rostedt */ 113377ae365eSSteven Rostedt static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer, 113477ae365eSSteven Rostedt struct list_head *list) 113577ae365eSSteven Rostedt { 113677ae365eSSteven Rostedt unsigned long *ptr; 113777ae365eSSteven Rostedt 113877ae365eSSteven Rostedt ptr = (unsigned long *)&list->next; 113977ae365eSSteven Rostedt *ptr |= RB_PAGE_HEAD; 114077ae365eSSteven Rostedt *ptr &= ~RB_PAGE_UPDATE; 114177ae365eSSteven Rostedt } 114277ae365eSSteven Rostedt 114377ae365eSSteven Rostedt /* 114477ae365eSSteven Rostedt * rb_head_page_activate - sets up head page 114577ae365eSSteven Rostedt */ 114677ae365eSSteven Rostedt static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer) 114777ae365eSSteven Rostedt { 114877ae365eSSteven Rostedt struct buffer_page *head; 114977ae365eSSteven Rostedt 115077ae365eSSteven Rostedt head = cpu_buffer->head_page; 115177ae365eSSteven Rostedt if (!head) 115277ae365eSSteven Rostedt return; 115377ae365eSSteven Rostedt 115477ae365eSSteven Rostedt /* 115577ae365eSSteven Rostedt * Set the previous list pointer to have the HEAD flag. 115677ae365eSSteven Rostedt */ 115777ae365eSSteven Rostedt rb_set_list_to_head(cpu_buffer, head->list.prev); 115877ae365eSSteven Rostedt } 115977ae365eSSteven Rostedt 116077ae365eSSteven Rostedt static void rb_list_head_clear(struct list_head *list) 116177ae365eSSteven Rostedt { 116277ae365eSSteven Rostedt unsigned long *ptr = (unsigned long *)&list->next; 116377ae365eSSteven Rostedt 116477ae365eSSteven Rostedt *ptr &= ~RB_FLAG_MASK; 116577ae365eSSteven Rostedt } 116677ae365eSSteven Rostedt 116777ae365eSSteven Rostedt /* 11686167c205SSteven Rostedt (VMware) * rb_head_page_deactivate - clears head page ptr (for free list) 116977ae365eSSteven Rostedt */ 117077ae365eSSteven Rostedt static void 117177ae365eSSteven Rostedt rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer) 117277ae365eSSteven Rostedt { 117377ae365eSSteven Rostedt struct list_head *hd; 117477ae365eSSteven Rostedt 117577ae365eSSteven Rostedt /* Go through the whole list and clear any pointers found. */ 117677ae365eSSteven Rostedt rb_list_head_clear(cpu_buffer->pages); 117777ae365eSSteven Rostedt 117877ae365eSSteven Rostedt list_for_each(hd, cpu_buffer->pages) 117977ae365eSSteven Rostedt rb_list_head_clear(hd); 118077ae365eSSteven Rostedt } 118177ae365eSSteven Rostedt 118277ae365eSSteven Rostedt static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer, 118377ae365eSSteven Rostedt struct buffer_page *head, 118477ae365eSSteven Rostedt struct buffer_page *prev, 118577ae365eSSteven Rostedt int old_flag, int new_flag) 118677ae365eSSteven Rostedt { 118777ae365eSSteven Rostedt struct list_head *list; 118877ae365eSSteven Rostedt unsigned long val = (unsigned long)&head->list; 118977ae365eSSteven Rostedt unsigned long ret; 119077ae365eSSteven Rostedt 119177ae365eSSteven Rostedt list = &prev->list; 119277ae365eSSteven Rostedt 119377ae365eSSteven Rostedt val &= ~RB_FLAG_MASK; 119477ae365eSSteven Rostedt 119508a40816SSteven Rostedt ret = cmpxchg((unsigned long *)&list->next, 119677ae365eSSteven Rostedt val | old_flag, val | new_flag); 119777ae365eSSteven Rostedt 119877ae365eSSteven Rostedt /* check if the reader took the page */ 119977ae365eSSteven Rostedt if ((ret & ~RB_FLAG_MASK) != val) 120077ae365eSSteven Rostedt return RB_PAGE_MOVED; 120177ae365eSSteven Rostedt 120277ae365eSSteven Rostedt return ret & RB_FLAG_MASK; 120377ae365eSSteven Rostedt } 120477ae365eSSteven Rostedt 120577ae365eSSteven Rostedt static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer, 120677ae365eSSteven Rostedt struct buffer_page *head, 120777ae365eSSteven Rostedt struct buffer_page *prev, 120877ae365eSSteven Rostedt int old_flag) 120977ae365eSSteven Rostedt { 121077ae365eSSteven Rostedt return rb_head_page_set(cpu_buffer, head, prev, 121177ae365eSSteven Rostedt old_flag, RB_PAGE_UPDATE); 121277ae365eSSteven Rostedt } 121377ae365eSSteven Rostedt 121477ae365eSSteven Rostedt static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer, 121577ae365eSSteven Rostedt struct buffer_page *head, 121677ae365eSSteven Rostedt struct buffer_page *prev, 121777ae365eSSteven Rostedt int old_flag) 121877ae365eSSteven Rostedt { 121977ae365eSSteven Rostedt return rb_head_page_set(cpu_buffer, head, prev, 122077ae365eSSteven Rostedt old_flag, RB_PAGE_HEAD); 122177ae365eSSteven Rostedt } 122277ae365eSSteven Rostedt 122377ae365eSSteven Rostedt static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer, 122477ae365eSSteven Rostedt struct buffer_page *head, 122577ae365eSSteven Rostedt struct buffer_page *prev, 122677ae365eSSteven Rostedt int old_flag) 122777ae365eSSteven Rostedt { 122877ae365eSSteven Rostedt return rb_head_page_set(cpu_buffer, head, prev, 122977ae365eSSteven Rostedt old_flag, RB_PAGE_NORMAL); 123077ae365eSSteven Rostedt } 123177ae365eSSteven Rostedt 123277ae365eSSteven Rostedt static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, 123377ae365eSSteven Rostedt struct buffer_page **bpage) 123477ae365eSSteven Rostedt { 123577ae365eSSteven Rostedt struct list_head *p = rb_list_head((*bpage)->list.next); 123677ae365eSSteven Rostedt 123777ae365eSSteven Rostedt *bpage = list_entry(p, struct buffer_page, list); 123877ae365eSSteven Rostedt } 123977ae365eSSteven Rostedt 124077ae365eSSteven Rostedt static struct buffer_page * 124177ae365eSSteven Rostedt rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer) 124277ae365eSSteven Rostedt { 124377ae365eSSteven Rostedt struct buffer_page *head; 124477ae365eSSteven Rostedt struct buffer_page *page; 124577ae365eSSteven Rostedt struct list_head *list; 124677ae365eSSteven Rostedt int i; 124777ae365eSSteven Rostedt 124877ae365eSSteven Rostedt if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page)) 124977ae365eSSteven Rostedt return NULL; 125077ae365eSSteven Rostedt 125177ae365eSSteven Rostedt /* sanity check */ 125277ae365eSSteven Rostedt list = cpu_buffer->pages; 125377ae365eSSteven Rostedt if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list)) 125477ae365eSSteven Rostedt return NULL; 125577ae365eSSteven Rostedt 125677ae365eSSteven Rostedt page = head = cpu_buffer->head_page; 125777ae365eSSteven Rostedt /* 125877ae365eSSteven Rostedt * It is possible that the writer moves the header behind 125977ae365eSSteven Rostedt * where we started, and we miss in one loop. 126077ae365eSSteven Rostedt * A second loop should grab the header, but we'll do 126177ae365eSSteven Rostedt * three loops just because I'm paranoid. 126277ae365eSSteven Rostedt */ 126377ae365eSSteven Rostedt for (i = 0; i < 3; i++) { 126477ae365eSSteven Rostedt do { 126577ae365eSSteven Rostedt if (rb_is_head_page(cpu_buffer, page, page->list.prev)) { 126677ae365eSSteven Rostedt cpu_buffer->head_page = page; 126777ae365eSSteven Rostedt return page; 126877ae365eSSteven Rostedt } 126977ae365eSSteven Rostedt rb_inc_page(cpu_buffer, &page); 127077ae365eSSteven Rostedt } while (page != head); 127177ae365eSSteven Rostedt } 127277ae365eSSteven Rostedt 127377ae365eSSteven Rostedt RB_WARN_ON(cpu_buffer, 1); 127477ae365eSSteven Rostedt 127577ae365eSSteven Rostedt return NULL; 127677ae365eSSteven Rostedt } 127777ae365eSSteven Rostedt 127877ae365eSSteven Rostedt static int rb_head_page_replace(struct buffer_page *old, 127977ae365eSSteven Rostedt struct buffer_page *new) 128077ae365eSSteven Rostedt { 128177ae365eSSteven Rostedt unsigned long *ptr = (unsigned long *)&old->list.prev->next; 128277ae365eSSteven Rostedt unsigned long val; 128377ae365eSSteven Rostedt unsigned long ret; 128477ae365eSSteven Rostedt 128577ae365eSSteven Rostedt val = *ptr & ~RB_FLAG_MASK; 128677ae365eSSteven Rostedt val |= RB_PAGE_HEAD; 128777ae365eSSteven Rostedt 128808a40816SSteven Rostedt ret = cmpxchg(ptr, val, (unsigned long)&new->list); 128977ae365eSSteven Rostedt 129077ae365eSSteven Rostedt return ret == val; 129177ae365eSSteven Rostedt } 129277ae365eSSteven Rostedt 129377ae365eSSteven Rostedt /* 129477ae365eSSteven Rostedt * rb_tail_page_update - move the tail page forward 129577ae365eSSteven Rostedt */ 129670004986SSteven Rostedt (Red Hat) static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, 129777ae365eSSteven Rostedt struct buffer_page *tail_page, 129877ae365eSSteven Rostedt struct buffer_page *next_page) 129977ae365eSSteven Rostedt { 130077ae365eSSteven Rostedt unsigned long old_entries; 130177ae365eSSteven Rostedt unsigned long old_write; 130277ae365eSSteven Rostedt 130377ae365eSSteven Rostedt /* 130477ae365eSSteven Rostedt * The tail page now needs to be moved forward. 130577ae365eSSteven Rostedt * 130677ae365eSSteven Rostedt * We need to reset the tail page, but without messing 130777ae365eSSteven Rostedt * with possible erasing of data brought in by interrupts 130877ae365eSSteven Rostedt * that have moved the tail page and are currently on it. 130977ae365eSSteven Rostedt * 131077ae365eSSteven Rostedt * We add a counter to the write field to denote this. 131177ae365eSSteven Rostedt */ 131277ae365eSSteven Rostedt old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write); 131377ae365eSSteven Rostedt old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries); 131477ae365eSSteven Rostedt 13152c2b0a78SSteven Rostedt (VMware) local_inc(&cpu_buffer->pages_touched); 131677ae365eSSteven Rostedt /* 131777ae365eSSteven Rostedt * Just make sure we have seen our old_write and synchronize 131877ae365eSSteven Rostedt * with any interrupts that come in. 131977ae365eSSteven Rostedt */ 132077ae365eSSteven Rostedt barrier(); 132177ae365eSSteven Rostedt 132277ae365eSSteven Rostedt /* 132377ae365eSSteven Rostedt * If the tail page is still the same as what we think 132477ae365eSSteven Rostedt * it is, then it is up to us to update the tail 132577ae365eSSteven Rostedt * pointer. 132677ae365eSSteven Rostedt */ 13278573636eSSteven Rostedt (Red Hat) if (tail_page == READ_ONCE(cpu_buffer->tail_page)) { 132877ae365eSSteven Rostedt /* Zero the write counter */ 132977ae365eSSteven Rostedt unsigned long val = old_write & ~RB_WRITE_MASK; 133077ae365eSSteven Rostedt unsigned long eval = old_entries & ~RB_WRITE_MASK; 133177ae365eSSteven Rostedt 133277ae365eSSteven Rostedt /* 133377ae365eSSteven Rostedt * This will only succeed if an interrupt did 133477ae365eSSteven Rostedt * not come in and change it. In which case, we 133577ae365eSSteven Rostedt * do not want to modify it. 1336da706d8bSLai Jiangshan * 1337da706d8bSLai Jiangshan * We add (void) to let the compiler know that we do not care 1338da706d8bSLai Jiangshan * about the return value of these functions. We use the 1339da706d8bSLai Jiangshan * cmpxchg to only update if an interrupt did not already 1340da706d8bSLai Jiangshan * do it for us. If the cmpxchg fails, we don't care. 134177ae365eSSteven Rostedt */ 1342da706d8bSLai Jiangshan (void)local_cmpxchg(&next_page->write, old_write, val); 1343da706d8bSLai Jiangshan (void)local_cmpxchg(&next_page->entries, old_entries, eval); 134477ae365eSSteven Rostedt 134577ae365eSSteven Rostedt /* 134677ae365eSSteven Rostedt * No need to worry about races with clearing out the commit. 134777ae365eSSteven Rostedt * it only can increment when a commit takes place. But that 134877ae365eSSteven Rostedt * only happens in the outer most nested commit. 134977ae365eSSteven Rostedt */ 135077ae365eSSteven Rostedt local_set(&next_page->page->commit, 0); 135177ae365eSSteven Rostedt 135270004986SSteven Rostedt (Red Hat) /* Again, either we update tail_page or an interrupt does */ 135370004986SSteven Rostedt (Red Hat) (void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page); 135477ae365eSSteven Rostedt } 135577ae365eSSteven Rostedt } 135677ae365eSSteven Rostedt 135777ae365eSSteven Rostedt static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer, 135877ae365eSSteven Rostedt struct buffer_page *bpage) 135977ae365eSSteven Rostedt { 136077ae365eSSteven Rostedt unsigned long val = (unsigned long)bpage; 136177ae365eSSteven Rostedt 136277ae365eSSteven Rostedt if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK)) 136377ae365eSSteven Rostedt return 1; 136477ae365eSSteven Rostedt 136577ae365eSSteven Rostedt return 0; 136677ae365eSSteven Rostedt } 136777ae365eSSteven Rostedt 136877ae365eSSteven Rostedt /** 136977ae365eSSteven Rostedt * rb_check_list - make sure a pointer to a list has the last bits zero 137077ae365eSSteven Rostedt */ 137177ae365eSSteven Rostedt static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer, 137277ae365eSSteven Rostedt struct list_head *list) 137377ae365eSSteven Rostedt { 137477ae365eSSteven Rostedt if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev)) 137577ae365eSSteven Rostedt return 1; 137677ae365eSSteven Rostedt if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next)) 137777ae365eSSteven Rostedt return 1; 137877ae365eSSteven Rostedt return 0; 137977ae365eSSteven Rostedt } 138077ae365eSSteven Rostedt 13817a8e76a3SSteven Rostedt /** 1382d611851bSzhangwei(Jovi) * rb_check_pages - integrity check of buffer pages 13837a8e76a3SSteven Rostedt * @cpu_buffer: CPU buffer with pages to test 13847a8e76a3SSteven Rostedt * 1385c3706f00SWenji Huang * As a safety measure we check to make sure the data pages have not 13867a8e76a3SSteven Rostedt * been corrupted. 13877a8e76a3SSteven Rostedt */ 13887a8e76a3SSteven Rostedt static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) 13897a8e76a3SSteven Rostedt { 13903adc54faSSteven Rostedt struct list_head *head = cpu_buffer->pages; 1391044fa782SSteven Rostedt struct buffer_page *bpage, *tmp; 13927a8e76a3SSteven Rostedt 1393308f7eebSSteven Rostedt /* Reset the head page if it exists */ 1394308f7eebSSteven Rostedt if (cpu_buffer->head_page) 1395308f7eebSSteven Rostedt rb_set_head_page(cpu_buffer); 1396308f7eebSSteven Rostedt 139777ae365eSSteven Rostedt rb_head_page_deactivate(cpu_buffer); 139877ae365eSSteven Rostedt 13993e89c7bbSSteven Rostedt if (RB_WARN_ON(cpu_buffer, head->next->prev != head)) 14003e89c7bbSSteven Rostedt return -1; 14013e89c7bbSSteven Rostedt if (RB_WARN_ON(cpu_buffer, head->prev->next != head)) 14023e89c7bbSSteven Rostedt return -1; 14037a8e76a3SSteven Rostedt 140477ae365eSSteven Rostedt if (rb_check_list(cpu_buffer, head)) 140577ae365eSSteven Rostedt return -1; 140677ae365eSSteven Rostedt 1407044fa782SSteven Rostedt list_for_each_entry_safe(bpage, tmp, head, list) { 14083e89c7bbSSteven Rostedt if (RB_WARN_ON(cpu_buffer, 1409044fa782SSteven Rostedt bpage->list.next->prev != &bpage->list)) 14103e89c7bbSSteven Rostedt return -1; 14113e89c7bbSSteven Rostedt if (RB_WARN_ON(cpu_buffer, 1412044fa782SSteven Rostedt bpage->list.prev->next != &bpage->list)) 14133e89c7bbSSteven Rostedt return -1; 141477ae365eSSteven Rostedt if (rb_check_list(cpu_buffer, &bpage->list)) 141577ae365eSSteven Rostedt return -1; 14167a8e76a3SSteven Rostedt } 14177a8e76a3SSteven Rostedt 141877ae365eSSteven Rostedt rb_head_page_activate(cpu_buffer); 141977ae365eSSteven Rostedt 14207a8e76a3SSteven Rostedt return 0; 14217a8e76a3SSteven Rostedt } 14227a8e76a3SSteven Rostedt 14239b94a8fbSSteven Rostedt (Red Hat) static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu) 14247a8e76a3SSteven Rostedt { 1425044fa782SSteven Rostedt struct buffer_page *bpage, *tmp; 1426927e56dbSSteven Rostedt (VMware) bool user_thread = current->mm != NULL; 1427927e56dbSSteven Rostedt (VMware) gfp_t mflags; 14289b94a8fbSSteven Rostedt (Red Hat) long i; 14293adc54faSSteven Rostedt 1430927e56dbSSteven Rostedt (VMware) /* 1431927e56dbSSteven Rostedt (VMware) * Check if the available memory is there first. 1432927e56dbSSteven Rostedt (VMware) * Note, si_mem_available() only gives us a rough estimate of available 1433927e56dbSSteven Rostedt (VMware) * memory. It may not be accurate. But we don't care, we just want 1434927e56dbSSteven Rostedt (VMware) * to prevent doing any allocation when it is obvious that it is 1435927e56dbSSteven Rostedt (VMware) * not going to succeed. 1436927e56dbSSteven Rostedt (VMware) */ 14372a872fa4SSteven Rostedt (VMware) i = si_mem_available(); 14382a872fa4SSteven Rostedt (VMware) if (i < nr_pages) 14392a872fa4SSteven Rostedt (VMware) return -ENOMEM; 14402a872fa4SSteven Rostedt (VMware) 1441d7ec4bfeSVaibhav Nagarnaik /* 144284861885SJoel Fernandes * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails 144384861885SJoel Fernandes * gracefully without invoking oom-killer and the system is not 144484861885SJoel Fernandes * destabilized. 1445d7ec4bfeSVaibhav Nagarnaik */ 1446927e56dbSSteven Rostedt (VMware) mflags = GFP_KERNEL | __GFP_RETRY_MAYFAIL; 1447927e56dbSSteven Rostedt (VMware) 1448927e56dbSSteven Rostedt (VMware) /* 1449927e56dbSSteven Rostedt (VMware) * If a user thread allocates too much, and si_mem_available() 1450927e56dbSSteven Rostedt (VMware) * reports there's enough memory, even though there is not. 1451927e56dbSSteven Rostedt (VMware) * Make sure the OOM killer kills this thread. This can happen 1452927e56dbSSteven Rostedt (VMware) * even with RETRY_MAYFAIL because another task may be doing 1453927e56dbSSteven Rostedt (VMware) * an allocation after this task has taken all memory. 1454927e56dbSSteven Rostedt (VMware) * This is the task the OOM killer needs to take out during this 1455927e56dbSSteven Rostedt (VMware) * loop, even if it was triggered by an allocation somewhere else. 1456927e56dbSSteven Rostedt (VMware) */ 1457927e56dbSSteven Rostedt (VMware) if (user_thread) 1458927e56dbSSteven Rostedt (VMware) set_current_oom_origin(); 1459927e56dbSSteven Rostedt (VMware) for (i = 0; i < nr_pages; i++) { 1460927e56dbSSteven Rostedt (VMware) struct page *page; 1461927e56dbSSteven Rostedt (VMware) 1462044fa782SSteven Rostedt bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1463927e56dbSSteven Rostedt (VMware) mflags, cpu_to_node(cpu)); 1464044fa782SSteven Rostedt if (!bpage) 1465e4c2ce82SSteven Rostedt goto free_pages; 146677ae365eSSteven Rostedt 1467438ced17SVaibhav Nagarnaik list_add(&bpage->list, pages); 146877ae365eSSteven Rostedt 1469927e56dbSSteven Rostedt (VMware) page = alloc_pages_node(cpu_to_node(cpu), mflags, 0); 14707ea59064SVaibhav Nagarnaik if (!page) 14717a8e76a3SSteven Rostedt goto free_pages; 14727ea59064SVaibhav Nagarnaik bpage->page = page_address(page); 1473044fa782SSteven Rostedt rb_init_page(bpage->page); 1474927e56dbSSteven Rostedt (VMware) 1475927e56dbSSteven Rostedt (VMware) if (user_thread && fatal_signal_pending(current)) 1476927e56dbSSteven Rostedt (VMware) goto free_pages; 14777a8e76a3SSteven Rostedt } 1478927e56dbSSteven Rostedt (VMware) if (user_thread) 1479927e56dbSSteven Rostedt (VMware) clear_current_oom_origin(); 14807a8e76a3SSteven Rostedt 1481438ced17SVaibhav Nagarnaik return 0; 1482438ced17SVaibhav Nagarnaik 1483438ced17SVaibhav Nagarnaik free_pages: 1484438ced17SVaibhav Nagarnaik list_for_each_entry_safe(bpage, tmp, pages, list) { 1485438ced17SVaibhav Nagarnaik list_del_init(&bpage->list); 1486438ced17SVaibhav Nagarnaik free_buffer_page(bpage); 1487438ced17SVaibhav Nagarnaik } 1488927e56dbSSteven Rostedt (VMware) if (user_thread) 1489927e56dbSSteven Rostedt (VMware) clear_current_oom_origin(); 1490438ced17SVaibhav Nagarnaik 1491438ced17SVaibhav Nagarnaik return -ENOMEM; 1492438ced17SVaibhav Nagarnaik } 1493438ced17SVaibhav Nagarnaik 1494438ced17SVaibhav Nagarnaik static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, 14959b94a8fbSSteven Rostedt (Red Hat) unsigned long nr_pages) 1496438ced17SVaibhav Nagarnaik { 1497438ced17SVaibhav Nagarnaik LIST_HEAD(pages); 1498438ced17SVaibhav Nagarnaik 1499438ced17SVaibhav Nagarnaik WARN_ON(!nr_pages); 1500438ced17SVaibhav Nagarnaik 1501438ced17SVaibhav Nagarnaik if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu)) 1502438ced17SVaibhav Nagarnaik return -ENOMEM; 1503438ced17SVaibhav Nagarnaik 15043adc54faSSteven Rostedt /* 15053adc54faSSteven Rostedt * The ring buffer page list is a circular list that does not 15063adc54faSSteven Rostedt * start and end with a list head. All page list items point to 15073adc54faSSteven Rostedt * other pages. 15083adc54faSSteven Rostedt */ 15093adc54faSSteven Rostedt cpu_buffer->pages = pages.next; 15103adc54faSSteven Rostedt list_del(&pages); 15117a8e76a3SSteven Rostedt 1512438ced17SVaibhav Nagarnaik cpu_buffer->nr_pages = nr_pages; 1513438ced17SVaibhav Nagarnaik 15147a8e76a3SSteven Rostedt rb_check_pages(cpu_buffer); 15157a8e76a3SSteven Rostedt 15167a8e76a3SSteven Rostedt return 0; 15177a8e76a3SSteven Rostedt } 15187a8e76a3SSteven Rostedt 15197a8e76a3SSteven Rostedt static struct ring_buffer_per_cpu * 152013292494SSteven Rostedt (VMware) rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu) 15217a8e76a3SSteven Rostedt { 15227a8e76a3SSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer; 1523044fa782SSteven Rostedt struct buffer_page *bpage; 15247ea59064SVaibhav Nagarnaik struct page *page; 15257a8e76a3SSteven Rostedt int ret; 15267a8e76a3SSteven Rostedt 15277a8e76a3SSteven Rostedt cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()), 15287a8e76a3SSteven Rostedt GFP_KERNEL, cpu_to_node(cpu)); 15297a8e76a3SSteven Rostedt if (!cpu_buffer) 15307a8e76a3SSteven Rostedt return NULL; 15317a8e76a3SSteven Rostedt 15327a8e76a3SSteven Rostedt cpu_buffer->cpu = cpu; 15337a8e76a3SSteven Rostedt cpu_buffer->buffer = buffer; 15345389f6faSThomas Gleixner raw_spin_lock_init(&cpu_buffer->reader_lock); 15351f8a6a10SPeter Zijlstra lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); 1536edc35bd7SThomas Gleixner cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 153783f40318SVaibhav Nagarnaik INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler); 153805fdd70dSVaibhav Nagarnaik init_completion(&cpu_buffer->update_done); 153915693458SSteven Rostedt (Red Hat) init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters); 1540f1dc6725SSteven Rostedt (Red Hat) init_waitqueue_head(&cpu_buffer->irq_work.waiters); 15411e0d6714SSteven Rostedt (Red Hat) init_waitqueue_head(&cpu_buffer->irq_work.full_waiters); 15427a8e76a3SSteven Rostedt 1543044fa782SSteven Rostedt bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1544e4c2ce82SSteven Rostedt GFP_KERNEL, cpu_to_node(cpu)); 1545044fa782SSteven Rostedt if (!bpage) 1546e4c2ce82SSteven Rostedt goto fail_free_buffer; 1547e4c2ce82SSteven Rostedt 154877ae365eSSteven Rostedt rb_check_bpage(cpu_buffer, bpage); 154977ae365eSSteven Rostedt 1550044fa782SSteven Rostedt cpu_buffer->reader_page = bpage; 15517ea59064SVaibhav Nagarnaik page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0); 15527ea59064SVaibhav Nagarnaik if (!page) 1553e4c2ce82SSteven Rostedt goto fail_free_reader; 15547ea59064SVaibhav Nagarnaik bpage->page = page_address(page); 1555044fa782SSteven Rostedt rb_init_page(bpage->page); 1556e4c2ce82SSteven Rostedt 1557d769041fSSteven Rostedt INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 155844b99462SVaibhav Nagarnaik INIT_LIST_HEAD(&cpu_buffer->new_pages); 1559d769041fSSteven Rostedt 1560438ced17SVaibhav Nagarnaik ret = rb_allocate_pages(cpu_buffer, nr_pages); 15617a8e76a3SSteven Rostedt if (ret < 0) 1562d769041fSSteven Rostedt goto fail_free_reader; 15637a8e76a3SSteven Rostedt 15647a8e76a3SSteven Rostedt cpu_buffer->head_page 15653adc54faSSteven Rostedt = list_entry(cpu_buffer->pages, struct buffer_page, list); 1566bf41a158SSteven Rostedt cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page; 15677a8e76a3SSteven Rostedt 156877ae365eSSteven Rostedt rb_head_page_activate(cpu_buffer); 156977ae365eSSteven Rostedt 15707a8e76a3SSteven Rostedt return cpu_buffer; 15717a8e76a3SSteven Rostedt 1572d769041fSSteven Rostedt fail_free_reader: 1573d769041fSSteven Rostedt free_buffer_page(cpu_buffer->reader_page); 1574d769041fSSteven Rostedt 15757a8e76a3SSteven Rostedt fail_free_buffer: 15767a8e76a3SSteven Rostedt kfree(cpu_buffer); 15777a8e76a3SSteven Rostedt return NULL; 15787a8e76a3SSteven Rostedt } 15797a8e76a3SSteven Rostedt 15807a8e76a3SSteven Rostedt static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) 15817a8e76a3SSteven Rostedt { 15823adc54faSSteven Rostedt struct list_head *head = cpu_buffer->pages; 1583044fa782SSteven Rostedt struct buffer_page *bpage, *tmp; 15847a8e76a3SSteven Rostedt 1585d769041fSSteven Rostedt free_buffer_page(cpu_buffer->reader_page); 1586d769041fSSteven Rostedt 158777ae365eSSteven Rostedt rb_head_page_deactivate(cpu_buffer); 158877ae365eSSteven Rostedt 15893adc54faSSteven Rostedt if (head) { 1590044fa782SSteven Rostedt list_for_each_entry_safe(bpage, tmp, head, list) { 1591044fa782SSteven Rostedt list_del_init(&bpage->list); 1592044fa782SSteven Rostedt free_buffer_page(bpage); 15937a8e76a3SSteven Rostedt } 15943adc54faSSteven Rostedt bpage = list_entry(head, struct buffer_page, list); 15953adc54faSSteven Rostedt free_buffer_page(bpage); 15963adc54faSSteven Rostedt } 15973adc54faSSteven Rostedt 15987a8e76a3SSteven Rostedt kfree(cpu_buffer); 15997a8e76a3SSteven Rostedt } 16007a8e76a3SSteven Rostedt 16017a8e76a3SSteven Rostedt /** 1602d611851bSzhangwei(Jovi) * __ring_buffer_alloc - allocate a new ring_buffer 160368814b58SRobert Richter * @size: the size in bytes per cpu that is needed. 16047a8e76a3SSteven Rostedt * @flags: attributes to set for the ring buffer. 160559e7cffeSFabian Frederick * @key: ring buffer reader_lock_key. 16067a8e76a3SSteven Rostedt * 16077a8e76a3SSteven Rostedt * Currently the only flag that is available is the RB_FL_OVERWRITE 16087a8e76a3SSteven Rostedt * flag. This flag means that the buffer will overwrite old data 16097a8e76a3SSteven Rostedt * when the buffer wraps. If this flag is not set, the buffer will 16107a8e76a3SSteven Rostedt * drop data when the tail hits the head. 16117a8e76a3SSteven Rostedt */ 161213292494SSteven Rostedt (VMware) struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, 16131f8a6a10SPeter Zijlstra struct lock_class_key *key) 16147a8e76a3SSteven Rostedt { 161513292494SSteven Rostedt (VMware) struct trace_buffer *buffer; 16169b94a8fbSSteven Rostedt (Red Hat) long nr_pages; 16177a8e76a3SSteven Rostedt int bsize; 16189b94a8fbSSteven Rostedt (Red Hat) int cpu; 1619b32614c0SSebastian Andrzej Siewior int ret; 16207a8e76a3SSteven Rostedt 16217a8e76a3SSteven Rostedt /* keep it in its own cache line */ 16227a8e76a3SSteven Rostedt buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), 16237a8e76a3SSteven Rostedt GFP_KERNEL); 16247a8e76a3SSteven Rostedt if (!buffer) 16257a8e76a3SSteven Rostedt return NULL; 16267a8e76a3SSteven Rostedt 1627b18cc3deSSebastian Andrzej Siewior if (!zalloc_cpumask_var(&buffer->cpumask, GFP_KERNEL)) 16289e01c1b7SRusty Russell goto fail_free_buffer; 16299e01c1b7SRusty Russell 1630438ced17SVaibhav Nagarnaik nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 16317a8e76a3SSteven Rostedt buffer->flags = flags; 163237886f6aSSteven Rostedt buffer->clock = trace_clock_local; 16331f8a6a10SPeter Zijlstra buffer->reader_lock_key = key; 16347a8e76a3SSteven Rostedt 163515693458SSteven Rostedt (Red Hat) init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters); 1636f1dc6725SSteven Rostedt (Red Hat) init_waitqueue_head(&buffer->irq_work.waiters); 163715693458SSteven Rostedt (Red Hat) 16387a8e76a3SSteven Rostedt /* need at least two pages */ 1639438ced17SVaibhav Nagarnaik if (nr_pages < 2) 1640438ced17SVaibhav Nagarnaik nr_pages = 2; 16417a8e76a3SSteven Rostedt 16427a8e76a3SSteven Rostedt buffer->cpus = nr_cpu_ids; 16437a8e76a3SSteven Rostedt 16447a8e76a3SSteven Rostedt bsize = sizeof(void *) * nr_cpu_ids; 16457a8e76a3SSteven Rostedt buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()), 16467a8e76a3SSteven Rostedt GFP_KERNEL); 16477a8e76a3SSteven Rostedt if (!buffer->buffers) 16489e01c1b7SRusty Russell goto fail_free_cpumask; 16497a8e76a3SSteven Rostedt 1650b32614c0SSebastian Andrzej Siewior cpu = raw_smp_processor_id(); 1651b32614c0SSebastian Andrzej Siewior cpumask_set_cpu(cpu, buffer->cpumask); 1652b32614c0SSebastian Andrzej Siewior buffer->buffers[cpu] = rb_allocate_cpu_buffer(buffer, nr_pages, cpu); 16537a8e76a3SSteven Rostedt if (!buffer->buffers[cpu]) 16547a8e76a3SSteven Rostedt goto fail_free_buffers; 16557a8e76a3SSteven Rostedt 1656b32614c0SSebastian Andrzej Siewior ret = cpuhp_state_add_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node); 1657b32614c0SSebastian Andrzej Siewior if (ret < 0) 1658b32614c0SSebastian Andrzej Siewior goto fail_free_buffers; 1659554f786eSSteven Rostedt 16607a8e76a3SSteven Rostedt mutex_init(&buffer->mutex); 16617a8e76a3SSteven Rostedt 16627a8e76a3SSteven Rostedt return buffer; 16637a8e76a3SSteven Rostedt 16647a8e76a3SSteven Rostedt fail_free_buffers: 16657a8e76a3SSteven Rostedt for_each_buffer_cpu(buffer, cpu) { 16667a8e76a3SSteven Rostedt if (buffer->buffers[cpu]) 16677a8e76a3SSteven Rostedt rb_free_cpu_buffer(buffer->buffers[cpu]); 16687a8e76a3SSteven Rostedt } 16697a8e76a3SSteven Rostedt kfree(buffer->buffers); 16707a8e76a3SSteven Rostedt 16719e01c1b7SRusty Russell fail_free_cpumask: 16729e01c1b7SRusty Russell free_cpumask_var(buffer->cpumask); 16739e01c1b7SRusty Russell 16747a8e76a3SSteven Rostedt fail_free_buffer: 16757a8e76a3SSteven Rostedt kfree(buffer); 16767a8e76a3SSteven Rostedt return NULL; 16777a8e76a3SSteven Rostedt } 16781f8a6a10SPeter Zijlstra EXPORT_SYMBOL_GPL(__ring_buffer_alloc); 16797a8e76a3SSteven Rostedt 16807a8e76a3SSteven Rostedt /** 16817a8e76a3SSteven Rostedt * ring_buffer_free - free a ring buffer. 16827a8e76a3SSteven Rostedt * @buffer: the buffer to free. 16837a8e76a3SSteven Rostedt */ 16847a8e76a3SSteven Rostedt void 168513292494SSteven Rostedt (VMware) ring_buffer_free(struct trace_buffer *buffer) 16867a8e76a3SSteven Rostedt { 16877a8e76a3SSteven Rostedt int cpu; 16887a8e76a3SSteven Rostedt 1689b32614c0SSebastian Andrzej Siewior cpuhp_state_remove_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node); 1690554f786eSSteven Rostedt 16917a8e76a3SSteven Rostedt for_each_buffer_cpu(buffer, cpu) 16927a8e76a3SSteven Rostedt rb_free_cpu_buffer(buffer->buffers[cpu]); 16937a8e76a3SSteven Rostedt 1694bd3f0221SEric Dumazet kfree(buffer->buffers); 16959e01c1b7SRusty Russell free_cpumask_var(buffer->cpumask); 16969e01c1b7SRusty Russell 16977a8e76a3SSteven Rostedt kfree(buffer); 16987a8e76a3SSteven Rostedt } 1699c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_free); 17007a8e76a3SSteven Rostedt 170113292494SSteven Rostedt (VMware) void ring_buffer_set_clock(struct trace_buffer *buffer, 170237886f6aSSteven Rostedt u64 (*clock)(void)) 170337886f6aSSteven Rostedt { 170437886f6aSSteven Rostedt buffer->clock = clock; 170537886f6aSSteven Rostedt } 170637886f6aSSteven Rostedt 170713292494SSteven Rostedt (VMware) void ring_buffer_set_time_stamp_abs(struct trace_buffer *buffer, bool abs) 170800b41452STom Zanussi { 170900b41452STom Zanussi buffer->time_stamp_abs = abs; 171000b41452STom Zanussi } 171100b41452STom Zanussi 171213292494SSteven Rostedt (VMware) bool ring_buffer_time_stamp_abs(struct trace_buffer *buffer) 171300b41452STom Zanussi { 171400b41452STom Zanussi return buffer->time_stamp_abs; 171500b41452STom Zanussi } 171600b41452STom Zanussi 17177a8e76a3SSteven Rostedt static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); 17187a8e76a3SSteven Rostedt 171983f40318SVaibhav Nagarnaik static inline unsigned long rb_page_entries(struct buffer_page *bpage) 17207a8e76a3SSteven Rostedt { 172183f40318SVaibhav Nagarnaik return local_read(&bpage->entries) & RB_WRITE_MASK; 172283f40318SVaibhav Nagarnaik } 172383f40318SVaibhav Nagarnaik 172483f40318SVaibhav Nagarnaik static inline unsigned long rb_page_write(struct buffer_page *bpage) 172583f40318SVaibhav Nagarnaik { 172683f40318SVaibhav Nagarnaik return local_read(&bpage->write) & RB_WRITE_MASK; 172783f40318SVaibhav Nagarnaik } 172883f40318SVaibhav Nagarnaik 17295040b4b7SVaibhav Nagarnaik static int 17309b94a8fbSSteven Rostedt (Red Hat) rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) 173183f40318SVaibhav Nagarnaik { 173283f40318SVaibhav Nagarnaik struct list_head *tail_page, *to_remove, *next_page; 173383f40318SVaibhav Nagarnaik struct buffer_page *to_remove_page, *tmp_iter_page; 173483f40318SVaibhav Nagarnaik struct buffer_page *last_page, *first_page; 17359b94a8fbSSteven Rostedt (Red Hat) unsigned long nr_removed; 173683f40318SVaibhav Nagarnaik unsigned long head_bit; 173783f40318SVaibhav Nagarnaik int page_entries; 173883f40318SVaibhav Nagarnaik 173983f40318SVaibhav Nagarnaik head_bit = 0; 17407a8e76a3SSteven Rostedt 17415389f6faSThomas Gleixner raw_spin_lock_irq(&cpu_buffer->reader_lock); 174283f40318SVaibhav Nagarnaik atomic_inc(&cpu_buffer->record_disabled); 174383f40318SVaibhav Nagarnaik /* 174483f40318SVaibhav Nagarnaik * We don't race with the readers since we have acquired the reader 174583f40318SVaibhav Nagarnaik * lock. We also don't race with writers after disabling recording. 174683f40318SVaibhav Nagarnaik * This makes it easy to figure out the first and the last page to be 174783f40318SVaibhav Nagarnaik * removed from the list. We unlink all the pages in between including 174883f40318SVaibhav Nagarnaik * the first and last pages. This is done in a busy loop so that we 174983f40318SVaibhav Nagarnaik * lose the least number of traces. 175083f40318SVaibhav Nagarnaik * The pages are freed after we restart recording and unlock readers. 175183f40318SVaibhav Nagarnaik */ 175283f40318SVaibhav Nagarnaik tail_page = &cpu_buffer->tail_page->list; 175377ae365eSSteven Rostedt 175483f40318SVaibhav Nagarnaik /* 175583f40318SVaibhav Nagarnaik * tail page might be on reader page, we remove the next page 175683f40318SVaibhav Nagarnaik * from the ring buffer 175783f40318SVaibhav Nagarnaik */ 175883f40318SVaibhav Nagarnaik if (cpu_buffer->tail_page == cpu_buffer->reader_page) 175983f40318SVaibhav Nagarnaik tail_page = rb_list_head(tail_page->next); 176083f40318SVaibhav Nagarnaik to_remove = tail_page; 176183f40318SVaibhav Nagarnaik 176283f40318SVaibhav Nagarnaik /* start of pages to remove */ 176383f40318SVaibhav Nagarnaik first_page = list_entry(rb_list_head(to_remove->next), 176483f40318SVaibhav Nagarnaik struct buffer_page, list); 176583f40318SVaibhav Nagarnaik 176683f40318SVaibhav Nagarnaik for (nr_removed = 0; nr_removed < nr_pages; nr_removed++) { 176783f40318SVaibhav Nagarnaik to_remove = rb_list_head(to_remove)->next; 176883f40318SVaibhav Nagarnaik head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD; 17697a8e76a3SSteven Rostedt } 17707a8e76a3SSteven Rostedt 177183f40318SVaibhav Nagarnaik next_page = rb_list_head(to_remove)->next; 17727a8e76a3SSteven Rostedt 177383f40318SVaibhav Nagarnaik /* 177483f40318SVaibhav Nagarnaik * Now we remove all pages between tail_page and next_page. 177583f40318SVaibhav Nagarnaik * Make sure that we have head_bit value preserved for the 177683f40318SVaibhav Nagarnaik * next page 177783f40318SVaibhav Nagarnaik */ 177883f40318SVaibhav Nagarnaik tail_page->next = (struct list_head *)((unsigned long)next_page | 177983f40318SVaibhav Nagarnaik head_bit); 178083f40318SVaibhav Nagarnaik next_page = rb_list_head(next_page); 178183f40318SVaibhav Nagarnaik next_page->prev = tail_page; 178283f40318SVaibhav Nagarnaik 178383f40318SVaibhav Nagarnaik /* make sure pages points to a valid page in the ring buffer */ 178483f40318SVaibhav Nagarnaik cpu_buffer->pages = next_page; 178583f40318SVaibhav Nagarnaik 178683f40318SVaibhav Nagarnaik /* update head page */ 178783f40318SVaibhav Nagarnaik if (head_bit) 178883f40318SVaibhav Nagarnaik cpu_buffer->head_page = list_entry(next_page, 178983f40318SVaibhav Nagarnaik struct buffer_page, list); 179083f40318SVaibhav Nagarnaik 179183f40318SVaibhav Nagarnaik /* 179283f40318SVaibhav Nagarnaik * change read pointer to make sure any read iterators reset 179383f40318SVaibhav Nagarnaik * themselves 179483f40318SVaibhav Nagarnaik */ 179583f40318SVaibhav Nagarnaik cpu_buffer->read = 0; 179683f40318SVaibhav Nagarnaik 179783f40318SVaibhav Nagarnaik /* pages are removed, resume tracing and then free the pages */ 179883f40318SVaibhav Nagarnaik atomic_dec(&cpu_buffer->record_disabled); 17995389f6faSThomas Gleixner raw_spin_unlock_irq(&cpu_buffer->reader_lock); 180083f40318SVaibhav Nagarnaik 180183f40318SVaibhav Nagarnaik RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)); 180283f40318SVaibhav Nagarnaik 180383f40318SVaibhav Nagarnaik /* last buffer page to remove */ 180483f40318SVaibhav Nagarnaik last_page = list_entry(rb_list_head(to_remove), struct buffer_page, 180583f40318SVaibhav Nagarnaik list); 180683f40318SVaibhav Nagarnaik tmp_iter_page = first_page; 180783f40318SVaibhav Nagarnaik 180883f40318SVaibhav Nagarnaik do { 180983f36555SVaibhav Nagarnaik cond_resched(); 181083f36555SVaibhav Nagarnaik 181183f40318SVaibhav Nagarnaik to_remove_page = tmp_iter_page; 181283f40318SVaibhav Nagarnaik rb_inc_page(cpu_buffer, &tmp_iter_page); 181383f40318SVaibhav Nagarnaik 181483f40318SVaibhav Nagarnaik /* update the counters */ 181583f40318SVaibhav Nagarnaik page_entries = rb_page_entries(to_remove_page); 181683f40318SVaibhav Nagarnaik if (page_entries) { 181783f40318SVaibhav Nagarnaik /* 181883f40318SVaibhav Nagarnaik * If something was added to this page, it was full 181983f40318SVaibhav Nagarnaik * since it is not the tail page. So we deduct the 182083f40318SVaibhav Nagarnaik * bytes consumed in ring buffer from here. 182148fdc72fSVaibhav Nagarnaik * Increment overrun to account for the lost events. 182283f40318SVaibhav Nagarnaik */ 182348fdc72fSVaibhav Nagarnaik local_add(page_entries, &cpu_buffer->overrun); 182483f40318SVaibhav Nagarnaik local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); 182583f40318SVaibhav Nagarnaik } 182683f40318SVaibhav Nagarnaik 182783f40318SVaibhav Nagarnaik /* 182883f40318SVaibhav Nagarnaik * We have already removed references to this list item, just 182983f40318SVaibhav Nagarnaik * free up the buffer_page and its page 183083f40318SVaibhav Nagarnaik */ 183183f40318SVaibhav Nagarnaik free_buffer_page(to_remove_page); 183283f40318SVaibhav Nagarnaik nr_removed--; 183383f40318SVaibhav Nagarnaik 183483f40318SVaibhav Nagarnaik } while (to_remove_page != last_page); 183583f40318SVaibhav Nagarnaik 183683f40318SVaibhav Nagarnaik RB_WARN_ON(cpu_buffer, nr_removed); 18375040b4b7SVaibhav Nagarnaik 18385040b4b7SVaibhav Nagarnaik return nr_removed == 0; 18397a8e76a3SSteven Rostedt } 18407a8e76a3SSteven Rostedt 18415040b4b7SVaibhav Nagarnaik static int 18425040b4b7SVaibhav Nagarnaik rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer) 18437a8e76a3SSteven Rostedt { 18445040b4b7SVaibhav Nagarnaik struct list_head *pages = &cpu_buffer->new_pages; 18455040b4b7SVaibhav Nagarnaik int retries, success; 18467a8e76a3SSteven Rostedt 18475389f6faSThomas Gleixner raw_spin_lock_irq(&cpu_buffer->reader_lock); 18485040b4b7SVaibhav Nagarnaik /* 18495040b4b7SVaibhav Nagarnaik * We are holding the reader lock, so the reader page won't be swapped 18505040b4b7SVaibhav Nagarnaik * in the ring buffer. Now we are racing with the writer trying to 18515040b4b7SVaibhav Nagarnaik * move head page and the tail page. 18525040b4b7SVaibhav Nagarnaik * We are going to adapt the reader page update process where: 18535040b4b7SVaibhav Nagarnaik * 1. We first splice the start and end of list of new pages between 18545040b4b7SVaibhav Nagarnaik * the head page and its previous page. 18555040b4b7SVaibhav Nagarnaik * 2. We cmpxchg the prev_page->next to point from head page to the 18565040b4b7SVaibhav Nagarnaik * start of new pages list. 18575040b4b7SVaibhav Nagarnaik * 3. Finally, we update the head->prev to the end of new list. 18585040b4b7SVaibhav Nagarnaik * 18595040b4b7SVaibhav Nagarnaik * We will try this process 10 times, to make sure that we don't keep 18605040b4b7SVaibhav Nagarnaik * spinning. 18615040b4b7SVaibhav Nagarnaik */ 18625040b4b7SVaibhav Nagarnaik retries = 10; 18635040b4b7SVaibhav Nagarnaik success = 0; 18645040b4b7SVaibhav Nagarnaik while (retries--) { 18655040b4b7SVaibhav Nagarnaik struct list_head *head_page, *prev_page, *r; 18665040b4b7SVaibhav Nagarnaik struct list_head *last_page, *first_page; 18675040b4b7SVaibhav Nagarnaik struct list_head *head_page_with_bit; 186877ae365eSSteven Rostedt 18695040b4b7SVaibhav Nagarnaik head_page = &rb_set_head_page(cpu_buffer)->list; 187054f7be5bSSteven Rostedt if (!head_page) 187154f7be5bSSteven Rostedt break; 18725040b4b7SVaibhav Nagarnaik prev_page = head_page->prev; 18735040b4b7SVaibhav Nagarnaik 18745040b4b7SVaibhav Nagarnaik first_page = pages->next; 18755040b4b7SVaibhav Nagarnaik last_page = pages->prev; 18765040b4b7SVaibhav Nagarnaik 18775040b4b7SVaibhav Nagarnaik head_page_with_bit = (struct list_head *) 18785040b4b7SVaibhav Nagarnaik ((unsigned long)head_page | RB_PAGE_HEAD); 18795040b4b7SVaibhav Nagarnaik 18805040b4b7SVaibhav Nagarnaik last_page->next = head_page_with_bit; 18815040b4b7SVaibhav Nagarnaik first_page->prev = prev_page; 18825040b4b7SVaibhav Nagarnaik 18835040b4b7SVaibhav Nagarnaik r = cmpxchg(&prev_page->next, head_page_with_bit, first_page); 18845040b4b7SVaibhav Nagarnaik 18855040b4b7SVaibhav Nagarnaik if (r == head_page_with_bit) { 18865040b4b7SVaibhav Nagarnaik /* 18875040b4b7SVaibhav Nagarnaik * yay, we replaced the page pointer to our new list, 18885040b4b7SVaibhav Nagarnaik * now, we just have to update to head page's prev 18895040b4b7SVaibhav Nagarnaik * pointer to point to end of list 18905040b4b7SVaibhav Nagarnaik */ 18915040b4b7SVaibhav Nagarnaik head_page->prev = last_page; 18925040b4b7SVaibhav Nagarnaik success = 1; 18935040b4b7SVaibhav Nagarnaik break; 18947a8e76a3SSteven Rostedt } 18955040b4b7SVaibhav Nagarnaik } 18967a8e76a3SSteven Rostedt 18975040b4b7SVaibhav Nagarnaik if (success) 18985040b4b7SVaibhav Nagarnaik INIT_LIST_HEAD(pages); 18995040b4b7SVaibhav Nagarnaik /* 19005040b4b7SVaibhav Nagarnaik * If we weren't successful in adding in new pages, warn and stop 19015040b4b7SVaibhav Nagarnaik * tracing 19025040b4b7SVaibhav Nagarnaik */ 19035040b4b7SVaibhav Nagarnaik RB_WARN_ON(cpu_buffer, !success); 19045389f6faSThomas Gleixner raw_spin_unlock_irq(&cpu_buffer->reader_lock); 19055040b4b7SVaibhav Nagarnaik 19065040b4b7SVaibhav Nagarnaik /* free pages if they weren't inserted */ 19075040b4b7SVaibhav Nagarnaik if (!success) { 19085040b4b7SVaibhav Nagarnaik struct buffer_page *bpage, *tmp; 19095040b4b7SVaibhav Nagarnaik list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages, 19105040b4b7SVaibhav Nagarnaik list) { 19115040b4b7SVaibhav Nagarnaik list_del_init(&bpage->list); 19125040b4b7SVaibhav Nagarnaik free_buffer_page(bpage); 19135040b4b7SVaibhav Nagarnaik } 19145040b4b7SVaibhav Nagarnaik } 19155040b4b7SVaibhav Nagarnaik return success; 19167a8e76a3SSteven Rostedt } 19177a8e76a3SSteven Rostedt 191883f40318SVaibhav Nagarnaik static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer) 1919438ced17SVaibhav Nagarnaik { 19205040b4b7SVaibhav Nagarnaik int success; 192183f40318SVaibhav Nagarnaik 19225040b4b7SVaibhav Nagarnaik if (cpu_buffer->nr_pages_to_update > 0) 19235040b4b7SVaibhav Nagarnaik success = rb_insert_pages(cpu_buffer); 19245040b4b7SVaibhav Nagarnaik else 19255040b4b7SVaibhav Nagarnaik success = rb_remove_pages(cpu_buffer, 19265040b4b7SVaibhav Nagarnaik -cpu_buffer->nr_pages_to_update); 19275040b4b7SVaibhav Nagarnaik 19285040b4b7SVaibhav Nagarnaik if (success) 1929438ced17SVaibhav Nagarnaik cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update; 193083f40318SVaibhav Nagarnaik } 193183f40318SVaibhav Nagarnaik 193283f40318SVaibhav Nagarnaik static void update_pages_handler(struct work_struct *work) 193383f40318SVaibhav Nagarnaik { 193483f40318SVaibhav Nagarnaik struct ring_buffer_per_cpu *cpu_buffer = container_of(work, 193583f40318SVaibhav Nagarnaik struct ring_buffer_per_cpu, update_pages_work); 193683f40318SVaibhav Nagarnaik rb_update_pages(cpu_buffer); 193705fdd70dSVaibhav Nagarnaik complete(&cpu_buffer->update_done); 1938438ced17SVaibhav Nagarnaik } 1939438ced17SVaibhav Nagarnaik 19407a8e76a3SSteven Rostedt /** 19417a8e76a3SSteven Rostedt * ring_buffer_resize - resize the ring buffer 19427a8e76a3SSteven Rostedt * @buffer: the buffer to resize. 19437a8e76a3SSteven Rostedt * @size: the new size. 1944d611851bSzhangwei(Jovi) * @cpu_id: the cpu buffer to resize 19457a8e76a3SSteven Rostedt * 19467a8e76a3SSteven Rostedt * Minimum size is 2 * BUF_PAGE_SIZE. 19477a8e76a3SSteven Rostedt * 194883f40318SVaibhav Nagarnaik * Returns 0 on success and < 0 on failure. 19497a8e76a3SSteven Rostedt */ 195013292494SSteven Rostedt (VMware) int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, 1951438ced17SVaibhav Nagarnaik int cpu_id) 19527a8e76a3SSteven Rostedt { 19537a8e76a3SSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer; 19549b94a8fbSSteven Rostedt (Red Hat) unsigned long nr_pages; 19550a1754b2SQiujun Huang int cpu, err; 19567a8e76a3SSteven Rostedt 1957ee51a1deSIngo Molnar /* 1958ee51a1deSIngo Molnar * Always succeed at resizing a non-existent buffer: 1959ee51a1deSIngo Molnar */ 1960ee51a1deSIngo Molnar if (!buffer) 19610a1754b2SQiujun Huang return 0; 1962ee51a1deSIngo Molnar 19636a31e1f1SSteven Rostedt /* Make sure the requested buffer exists */ 19646a31e1f1SSteven Rostedt if (cpu_id != RING_BUFFER_ALL_CPUS && 19656a31e1f1SSteven Rostedt !cpumask_test_cpu(cpu_id, buffer->cpumask)) 19660a1754b2SQiujun Huang return 0; 19676a31e1f1SSteven Rostedt 196859643d15SSteven Rostedt (Red Hat) nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 19697a8e76a3SSteven Rostedt 19707a8e76a3SSteven Rostedt /* we need a minimum of two pages */ 197159643d15SSteven Rostedt (Red Hat) if (nr_pages < 2) 197259643d15SSteven Rostedt (Red Hat) nr_pages = 2; 19737a8e76a3SSteven Rostedt 197459643d15SSteven Rostedt (Red Hat) size = nr_pages * BUF_PAGE_SIZE; 19757a8e76a3SSteven Rostedt 197607b8b10eSSteven Rostedt (VMware) /* prevent another thread from changing buffer sizes */ 197707b8b10eSSteven Rostedt (VMware) mutex_lock(&buffer->mutex); 197807b8b10eSSteven Rostedt (VMware) 197907b8b10eSSteven Rostedt (VMware) 198007b8b10eSSteven Rostedt (VMware) if (cpu_id == RING_BUFFER_ALL_CPUS) { 198183f40318SVaibhav Nagarnaik /* 198283f40318SVaibhav Nagarnaik * Don't succeed if resizing is disabled, as a reader might be 198383f40318SVaibhav Nagarnaik * manipulating the ring buffer and is expecting a sane state while 198483f40318SVaibhav Nagarnaik * this is true. 198583f40318SVaibhav Nagarnaik */ 198607b8b10eSSteven Rostedt (VMware) for_each_buffer_cpu(buffer, cpu) { 198707b8b10eSSteven Rostedt (VMware) cpu_buffer = buffer->buffers[cpu]; 198807b8b10eSSteven Rostedt (VMware) if (atomic_read(&cpu_buffer->resize_disabled)) { 198907b8b10eSSteven Rostedt (VMware) err = -EBUSY; 199007b8b10eSSteven Rostedt (VMware) goto out_err_unlock; 199107b8b10eSSteven Rostedt (VMware) } 199207b8b10eSSteven Rostedt (VMware) } 199383f40318SVaibhav Nagarnaik 1994438ced17SVaibhav Nagarnaik /* calculate the pages to update */ 19957a8e76a3SSteven Rostedt for_each_buffer_cpu(buffer, cpu) { 19967a8e76a3SSteven Rostedt cpu_buffer = buffer->buffers[cpu]; 1997438ced17SVaibhav Nagarnaik 1998438ced17SVaibhav Nagarnaik cpu_buffer->nr_pages_to_update = nr_pages - 1999438ced17SVaibhav Nagarnaik cpu_buffer->nr_pages; 2000438ced17SVaibhav Nagarnaik /* 2001438ced17SVaibhav Nagarnaik * nothing more to do for removing pages or no update 2002438ced17SVaibhav Nagarnaik */ 2003438ced17SVaibhav Nagarnaik if (cpu_buffer->nr_pages_to_update <= 0) 2004438ced17SVaibhav Nagarnaik continue; 2005438ced17SVaibhav Nagarnaik /* 2006438ced17SVaibhav Nagarnaik * to add pages, make sure all new pages can be 2007438ced17SVaibhav Nagarnaik * allocated without receiving ENOMEM 2008438ced17SVaibhav Nagarnaik */ 2009438ced17SVaibhav Nagarnaik INIT_LIST_HEAD(&cpu_buffer->new_pages); 2010438ced17SVaibhav Nagarnaik if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update, 201183f40318SVaibhav Nagarnaik &cpu_buffer->new_pages, cpu)) { 2012438ced17SVaibhav Nagarnaik /* not enough memory for new pages */ 201383f40318SVaibhav Nagarnaik err = -ENOMEM; 201483f40318SVaibhav Nagarnaik goto out_err; 201583f40318SVaibhav Nagarnaik } 201683f40318SVaibhav Nagarnaik } 201783f40318SVaibhav Nagarnaik 201883f40318SVaibhav Nagarnaik get_online_cpus(); 201983f40318SVaibhav Nagarnaik /* 202083f40318SVaibhav Nagarnaik * Fire off all the required work handlers 202105fdd70dSVaibhav Nagarnaik * We can't schedule on offline CPUs, but it's not necessary 202283f40318SVaibhav Nagarnaik * since we can change their buffer sizes without any race. 202383f40318SVaibhav Nagarnaik */ 202483f40318SVaibhav Nagarnaik for_each_buffer_cpu(buffer, cpu) { 202583f40318SVaibhav Nagarnaik cpu_buffer = buffer->buffers[cpu]; 202605fdd70dSVaibhav Nagarnaik if (!cpu_buffer->nr_pages_to_update) 202783f40318SVaibhav Nagarnaik continue; 202883f40318SVaibhav Nagarnaik 2029021c5b34SCorey Minyard /* Can't run something on an offline CPU. */ 2030021c5b34SCorey Minyard if (!cpu_online(cpu)) { 2031f5eb5588SSteven Rostedt (Red Hat) rb_update_pages(cpu_buffer); 2032f5eb5588SSteven Rostedt (Red Hat) cpu_buffer->nr_pages_to_update = 0; 2033f5eb5588SSteven Rostedt (Red Hat) } else { 203405fdd70dSVaibhav Nagarnaik schedule_work_on(cpu, 203505fdd70dSVaibhav Nagarnaik &cpu_buffer->update_pages_work); 2036f5eb5588SSteven Rostedt (Red Hat) } 20377a8e76a3SSteven Rostedt } 2038438ced17SVaibhav Nagarnaik 2039438ced17SVaibhav Nagarnaik /* wait for all the updates to complete */ 2040438ced17SVaibhav Nagarnaik for_each_buffer_cpu(buffer, cpu) { 2041438ced17SVaibhav Nagarnaik cpu_buffer = buffer->buffers[cpu]; 204205fdd70dSVaibhav Nagarnaik if (!cpu_buffer->nr_pages_to_update) 204383f40318SVaibhav Nagarnaik continue; 204483f40318SVaibhav Nagarnaik 204505fdd70dSVaibhav Nagarnaik if (cpu_online(cpu)) 204605fdd70dSVaibhav Nagarnaik wait_for_completion(&cpu_buffer->update_done); 204783f40318SVaibhav Nagarnaik cpu_buffer->nr_pages_to_update = 0; 2048438ced17SVaibhav Nagarnaik } 204983f40318SVaibhav Nagarnaik 205083f40318SVaibhav Nagarnaik put_online_cpus(); 2051438ced17SVaibhav Nagarnaik } else { 20526167c205SSteven Rostedt (VMware) /* Make sure this CPU has been initialized */ 20538e49f418SVaibhav Nagarnaik if (!cpumask_test_cpu(cpu_id, buffer->cpumask)) 20548e49f418SVaibhav Nagarnaik goto out; 20558e49f418SVaibhav Nagarnaik 2056438ced17SVaibhav Nagarnaik cpu_buffer = buffer->buffers[cpu_id]; 205783f40318SVaibhav Nagarnaik 2058438ced17SVaibhav Nagarnaik if (nr_pages == cpu_buffer->nr_pages) 20597a8e76a3SSteven Rostedt goto out; 2060438ced17SVaibhav Nagarnaik 206107b8b10eSSteven Rostedt (VMware) /* 206207b8b10eSSteven Rostedt (VMware) * Don't succeed if resizing is disabled, as a reader might be 206307b8b10eSSteven Rostedt (VMware) * manipulating the ring buffer and is expecting a sane state while 206407b8b10eSSteven Rostedt (VMware) * this is true. 206507b8b10eSSteven Rostedt (VMware) */ 206607b8b10eSSteven Rostedt (VMware) if (atomic_read(&cpu_buffer->resize_disabled)) { 206707b8b10eSSteven Rostedt (VMware) err = -EBUSY; 206807b8b10eSSteven Rostedt (VMware) goto out_err_unlock; 206907b8b10eSSteven Rostedt (VMware) } 207007b8b10eSSteven Rostedt (VMware) 2071438ced17SVaibhav Nagarnaik cpu_buffer->nr_pages_to_update = nr_pages - 2072438ced17SVaibhav Nagarnaik cpu_buffer->nr_pages; 2073438ced17SVaibhav Nagarnaik 2074438ced17SVaibhav Nagarnaik INIT_LIST_HEAD(&cpu_buffer->new_pages); 2075438ced17SVaibhav Nagarnaik if (cpu_buffer->nr_pages_to_update > 0 && 2076438ced17SVaibhav Nagarnaik __rb_allocate_pages(cpu_buffer->nr_pages_to_update, 207783f40318SVaibhav Nagarnaik &cpu_buffer->new_pages, cpu_id)) { 207883f40318SVaibhav Nagarnaik err = -ENOMEM; 207983f40318SVaibhav Nagarnaik goto out_err; 208083f40318SVaibhav Nagarnaik } 2081438ced17SVaibhav Nagarnaik 208283f40318SVaibhav Nagarnaik get_online_cpus(); 208383f40318SVaibhav Nagarnaik 2084021c5b34SCorey Minyard /* Can't run something on an offline CPU. */ 2085021c5b34SCorey Minyard if (!cpu_online(cpu_id)) 2086f5eb5588SSteven Rostedt (Red Hat) rb_update_pages(cpu_buffer); 2087f5eb5588SSteven Rostedt (Red Hat) else { 208883f40318SVaibhav Nagarnaik schedule_work_on(cpu_id, 208983f40318SVaibhav Nagarnaik &cpu_buffer->update_pages_work); 209005fdd70dSVaibhav Nagarnaik wait_for_completion(&cpu_buffer->update_done); 2091f5eb5588SSteven Rostedt (Red Hat) } 209283f40318SVaibhav Nagarnaik 209383f40318SVaibhav Nagarnaik cpu_buffer->nr_pages_to_update = 0; 209405fdd70dSVaibhav Nagarnaik put_online_cpus(); 20957a8e76a3SSteven Rostedt } 20967a8e76a3SSteven Rostedt 20977a8e76a3SSteven Rostedt out: 2098659f451fSSteven Rostedt /* 2099659f451fSSteven Rostedt * The ring buffer resize can happen with the ring buffer 2100659f451fSSteven Rostedt * enabled, so that the update disturbs the tracing as little 2101659f451fSSteven Rostedt * as possible. But if the buffer is disabled, we do not need 2102659f451fSSteven Rostedt * to worry about that, and we can take the time to verify 2103659f451fSSteven Rostedt * that the buffer is not corrupt. 2104659f451fSSteven Rostedt */ 2105659f451fSSteven Rostedt if (atomic_read(&buffer->record_disabled)) { 2106659f451fSSteven Rostedt atomic_inc(&buffer->record_disabled); 2107659f451fSSteven Rostedt /* 2108659f451fSSteven Rostedt * Even though the buffer was disabled, we must make sure 2109659f451fSSteven Rostedt * that it is truly disabled before calling rb_check_pages. 2110659f451fSSteven Rostedt * There could have been a race between checking 2111659f451fSSteven Rostedt * record_disable and incrementing it. 2112659f451fSSteven Rostedt */ 211374401729SPaul E. McKenney synchronize_rcu(); 2114659f451fSSteven Rostedt for_each_buffer_cpu(buffer, cpu) { 2115659f451fSSteven Rostedt cpu_buffer = buffer->buffers[cpu]; 2116659f451fSSteven Rostedt rb_check_pages(cpu_buffer); 2117659f451fSSteven Rostedt } 2118659f451fSSteven Rostedt atomic_dec(&buffer->record_disabled); 2119659f451fSSteven Rostedt } 2120659f451fSSteven Rostedt 21217a8e76a3SSteven Rostedt mutex_unlock(&buffer->mutex); 21220a1754b2SQiujun Huang return 0; 21237a8e76a3SSteven Rostedt 212483f40318SVaibhav Nagarnaik out_err: 2125438ced17SVaibhav Nagarnaik for_each_buffer_cpu(buffer, cpu) { 2126438ced17SVaibhav Nagarnaik struct buffer_page *bpage, *tmp; 212783f40318SVaibhav Nagarnaik 2128438ced17SVaibhav Nagarnaik cpu_buffer = buffer->buffers[cpu]; 2129438ced17SVaibhav Nagarnaik cpu_buffer->nr_pages_to_update = 0; 213083f40318SVaibhav Nagarnaik 2131438ced17SVaibhav Nagarnaik if (list_empty(&cpu_buffer->new_pages)) 2132438ced17SVaibhav Nagarnaik continue; 213383f40318SVaibhav Nagarnaik 2134438ced17SVaibhav Nagarnaik list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages, 2135438ced17SVaibhav Nagarnaik list) { 2136044fa782SSteven Rostedt list_del_init(&bpage->list); 2137044fa782SSteven Rostedt free_buffer_page(bpage); 21387a8e76a3SSteven Rostedt } 2139438ced17SVaibhav Nagarnaik } 214007b8b10eSSteven Rostedt (VMware) out_err_unlock: 2141641d2f63SVegard Nossum mutex_unlock(&buffer->mutex); 214283f40318SVaibhav Nagarnaik return err; 21437a8e76a3SSteven Rostedt } 2144c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_resize); 21457a8e76a3SSteven Rostedt 214613292494SSteven Rostedt (VMware) void ring_buffer_change_overwrite(struct trace_buffer *buffer, int val) 2147750912faSDavid Sharp { 2148750912faSDavid Sharp mutex_lock(&buffer->mutex); 2149750912faSDavid Sharp if (val) 2150750912faSDavid Sharp buffer->flags |= RB_FL_OVERWRITE; 2151750912faSDavid Sharp else 2152750912faSDavid Sharp buffer->flags &= ~RB_FL_OVERWRITE; 2153750912faSDavid Sharp mutex_unlock(&buffer->mutex); 2154750912faSDavid Sharp } 2155750912faSDavid Sharp EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite); 2156750912faSDavid Sharp 21572289d567SSteven Rostedt (Red Hat) static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index) 21587a8e76a3SSteven Rostedt { 2159044fa782SSteven Rostedt return bpage->page->data + index; 21607a8e76a3SSteven Rostedt } 21617a8e76a3SSteven Rostedt 21622289d567SSteven Rostedt (Red Hat) static __always_inline struct ring_buffer_event * 2163d769041fSSteven Rostedt rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) 21647a8e76a3SSteven Rostedt { 21656f807acdSSteven Rostedt return __rb_page_index(cpu_buffer->reader_page, 21666f807acdSSteven Rostedt cpu_buffer->reader_page->read); 21676f807acdSSteven Rostedt } 21686f807acdSSteven Rostedt 21692289d567SSteven Rostedt (Red Hat) static __always_inline unsigned rb_page_commit(struct buffer_page *bpage) 2170bf41a158SSteven Rostedt { 2171abc9b56dSSteven Rostedt return local_read(&bpage->page->commit); 2172bf41a158SSteven Rostedt } 2173bf41a158SSteven Rostedt 2174785888c5SSteven Rostedt (VMware) static struct ring_buffer_event * 2175785888c5SSteven Rostedt (VMware) rb_iter_head_event(struct ring_buffer_iter *iter) 2176785888c5SSteven Rostedt (VMware) { 2177785888c5SSteven Rostedt (VMware) struct ring_buffer_event *event; 2178785888c5SSteven Rostedt (VMware) struct buffer_page *iter_head_page = iter->head_page; 2179785888c5SSteven Rostedt (VMware) unsigned long commit; 2180785888c5SSteven Rostedt (VMware) unsigned length; 2181785888c5SSteven Rostedt (VMware) 2182153368ceSSteven Rostedt (VMware) if (iter->head != iter->next_event) 2183153368ceSSteven Rostedt (VMware) return iter->event; 2184153368ceSSteven Rostedt (VMware) 2185785888c5SSteven Rostedt (VMware) /* 2186785888c5SSteven Rostedt (VMware) * When the writer goes across pages, it issues a cmpxchg which 2187785888c5SSteven Rostedt (VMware) * is a mb(), which will synchronize with the rmb here. 2188785888c5SSteven Rostedt (VMware) * (see rb_tail_page_update() and __rb_reserve_next()) 2189785888c5SSteven Rostedt (VMware) */ 2190785888c5SSteven Rostedt (VMware) commit = rb_page_commit(iter_head_page); 2191785888c5SSteven Rostedt (VMware) smp_rmb(); 2192785888c5SSteven Rostedt (VMware) event = __rb_page_index(iter_head_page, iter->head); 2193785888c5SSteven Rostedt (VMware) length = rb_event_length(event); 2194785888c5SSteven Rostedt (VMware) 2195785888c5SSteven Rostedt (VMware) /* 2196785888c5SSteven Rostedt (VMware) * READ_ONCE() doesn't work on functions and we don't want the 2197785888c5SSteven Rostedt (VMware) * compiler doing any crazy optimizations with length. 2198785888c5SSteven Rostedt (VMware) */ 2199785888c5SSteven Rostedt (VMware) barrier(); 2200785888c5SSteven Rostedt (VMware) 2201785888c5SSteven Rostedt (VMware) if ((iter->head + length) > commit || length > BUF_MAX_DATA_SIZE) 2202785888c5SSteven Rostedt (VMware) /* Writer corrupted the read? */ 2203785888c5SSteven Rostedt (VMware) goto reset; 2204785888c5SSteven Rostedt (VMware) 2205785888c5SSteven Rostedt (VMware) memcpy(iter->event, event, length); 2206785888c5SSteven Rostedt (VMware) /* 2207785888c5SSteven Rostedt (VMware) * If the page stamp is still the same after this rmb() then the 2208785888c5SSteven Rostedt (VMware) * event was safely copied without the writer entering the page. 2209785888c5SSteven Rostedt (VMware) */ 2210785888c5SSteven Rostedt (VMware) smp_rmb(); 2211785888c5SSteven Rostedt (VMware) 2212785888c5SSteven Rostedt (VMware) /* Make sure the page didn't change since we read this */ 2213785888c5SSteven Rostedt (VMware) if (iter->page_stamp != iter_head_page->page->time_stamp || 2214785888c5SSteven Rostedt (VMware) commit > rb_page_commit(iter_head_page)) 2215785888c5SSteven Rostedt (VMware) goto reset; 2216785888c5SSteven Rostedt (VMware) 2217785888c5SSteven Rostedt (VMware) iter->next_event = iter->head + length; 2218785888c5SSteven Rostedt (VMware) return iter->event; 2219785888c5SSteven Rostedt (VMware) reset: 2220785888c5SSteven Rostedt (VMware) /* Reset to the beginning */ 2221785888c5SSteven Rostedt (VMware) iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp; 2222785888c5SSteven Rostedt (VMware) iter->head = 0; 2223785888c5SSteven Rostedt (VMware) iter->next_event = 0; 2224c9b7a4a7SSteven Rostedt (VMware) iter->missed_events = 1; 2225785888c5SSteven Rostedt (VMware) return NULL; 2226785888c5SSteven Rostedt (VMware) } 2227785888c5SSteven Rostedt (VMware) 222825985edcSLucas De Marchi /* Size is determined by what has been committed */ 22292289d567SSteven Rostedt (Red Hat) static __always_inline unsigned rb_page_size(struct buffer_page *bpage) 2230bf41a158SSteven Rostedt { 2231bf41a158SSteven Rostedt return rb_page_commit(bpage); 2232bf41a158SSteven Rostedt } 2233bf41a158SSteven Rostedt 22342289d567SSteven Rostedt (Red Hat) static __always_inline unsigned 2235bf41a158SSteven Rostedt rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer) 2236bf41a158SSteven Rostedt { 2237bf41a158SSteven Rostedt return rb_page_commit(cpu_buffer->commit_page); 2238bf41a158SSteven Rostedt } 2239bf41a158SSteven Rostedt 22402289d567SSteven Rostedt (Red Hat) static __always_inline unsigned 2241bf41a158SSteven Rostedt rb_event_index(struct ring_buffer_event *event) 22427a8e76a3SSteven Rostedt { 2243bf41a158SSteven Rostedt unsigned long addr = (unsigned long)event; 2244bf41a158SSteven Rostedt 224522f470f8SSteven Rostedt return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE; 22467a8e76a3SSteven Rostedt } 22477a8e76a3SSteven Rostedt 224834a148bfSAndrew Morton static void rb_inc_iter(struct ring_buffer_iter *iter) 2249d769041fSSteven Rostedt { 2250d769041fSSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 2251d769041fSSteven Rostedt 2252d769041fSSteven Rostedt /* 2253d769041fSSteven Rostedt * The iterator could be on the reader page (it starts there). 2254d769041fSSteven Rostedt * But the head could have moved, since the reader was 2255d769041fSSteven Rostedt * found. Check for this case and assign the iterator 2256d769041fSSteven Rostedt * to the head page instead of next. 2257d769041fSSteven Rostedt */ 2258d769041fSSteven Rostedt if (iter->head_page == cpu_buffer->reader_page) 225977ae365eSSteven Rostedt iter->head_page = rb_set_head_page(cpu_buffer); 2260d769041fSSteven Rostedt else 2261d769041fSSteven Rostedt rb_inc_page(cpu_buffer, &iter->head_page); 2262d769041fSSteven Rostedt 226328e3fc56SSteven Rostedt (VMware) iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp; 22647a8e76a3SSteven Rostedt iter->head = 0; 2265785888c5SSteven Rostedt (VMware) iter->next_event = 0; 22667a8e76a3SSteven Rostedt } 22677a8e76a3SSteven Rostedt 226877ae365eSSteven Rostedt /* 226977ae365eSSteven Rostedt * rb_handle_head_page - writer hit the head page 227077ae365eSSteven Rostedt * 227177ae365eSSteven Rostedt * Returns: +1 to retry page 227277ae365eSSteven Rostedt * 0 to continue 227377ae365eSSteven Rostedt * -1 on error 227477ae365eSSteven Rostedt */ 227577ae365eSSteven Rostedt static int 227677ae365eSSteven Rostedt rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, 227777ae365eSSteven Rostedt struct buffer_page *tail_page, 227877ae365eSSteven Rostedt struct buffer_page *next_page) 227977ae365eSSteven Rostedt { 228077ae365eSSteven Rostedt struct buffer_page *new_head; 228177ae365eSSteven Rostedt int entries; 228277ae365eSSteven Rostedt int type; 228377ae365eSSteven Rostedt int ret; 228477ae365eSSteven Rostedt 228577ae365eSSteven Rostedt entries = rb_page_entries(next_page); 228677ae365eSSteven Rostedt 228777ae365eSSteven Rostedt /* 228877ae365eSSteven Rostedt * The hard part is here. We need to move the head 228977ae365eSSteven Rostedt * forward, and protect against both readers on 229077ae365eSSteven Rostedt * other CPUs and writers coming in via interrupts. 229177ae365eSSteven Rostedt */ 229277ae365eSSteven Rostedt type = rb_head_page_set_update(cpu_buffer, next_page, tail_page, 229377ae365eSSteven Rostedt RB_PAGE_HEAD); 229477ae365eSSteven Rostedt 229577ae365eSSteven Rostedt /* 229677ae365eSSteven Rostedt * type can be one of four: 229777ae365eSSteven Rostedt * NORMAL - an interrupt already moved it for us 229877ae365eSSteven Rostedt * HEAD - we are the first to get here. 229977ae365eSSteven Rostedt * UPDATE - we are the interrupt interrupting 230077ae365eSSteven Rostedt * a current move. 230177ae365eSSteven Rostedt * MOVED - a reader on another CPU moved the next 230277ae365eSSteven Rostedt * pointer to its reader page. Give up 230377ae365eSSteven Rostedt * and try again. 230477ae365eSSteven Rostedt */ 230577ae365eSSteven Rostedt 230677ae365eSSteven Rostedt switch (type) { 230777ae365eSSteven Rostedt case RB_PAGE_HEAD: 230877ae365eSSteven Rostedt /* 230977ae365eSSteven Rostedt * We changed the head to UPDATE, thus 231077ae365eSSteven Rostedt * it is our responsibility to update 231177ae365eSSteven Rostedt * the counters. 231277ae365eSSteven Rostedt */ 231377ae365eSSteven Rostedt local_add(entries, &cpu_buffer->overrun); 2314c64e148aSVaibhav Nagarnaik local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); 231577ae365eSSteven Rostedt 231677ae365eSSteven Rostedt /* 231777ae365eSSteven Rostedt * The entries will be zeroed out when we move the 231877ae365eSSteven Rostedt * tail page. 231977ae365eSSteven Rostedt */ 232077ae365eSSteven Rostedt 232177ae365eSSteven Rostedt /* still more to do */ 232277ae365eSSteven Rostedt break; 232377ae365eSSteven Rostedt 232477ae365eSSteven Rostedt case RB_PAGE_UPDATE: 232577ae365eSSteven Rostedt /* 232677ae365eSSteven Rostedt * This is an interrupt that interrupt the 232777ae365eSSteven Rostedt * previous update. Still more to do. 232877ae365eSSteven Rostedt */ 232977ae365eSSteven Rostedt break; 233077ae365eSSteven Rostedt case RB_PAGE_NORMAL: 233177ae365eSSteven Rostedt /* 233277ae365eSSteven Rostedt * An interrupt came in before the update 233377ae365eSSteven Rostedt * and processed this for us. 233477ae365eSSteven Rostedt * Nothing left to do. 233577ae365eSSteven Rostedt */ 233677ae365eSSteven Rostedt return 1; 233777ae365eSSteven Rostedt case RB_PAGE_MOVED: 233877ae365eSSteven Rostedt /* 233977ae365eSSteven Rostedt * The reader is on another CPU and just did 234077ae365eSSteven Rostedt * a swap with our next_page. 234177ae365eSSteven Rostedt * Try again. 234277ae365eSSteven Rostedt */ 234377ae365eSSteven Rostedt return 1; 234477ae365eSSteven Rostedt default: 234577ae365eSSteven Rostedt RB_WARN_ON(cpu_buffer, 1); /* WTF??? */ 234677ae365eSSteven Rostedt return -1; 234777ae365eSSteven Rostedt } 234877ae365eSSteven Rostedt 234977ae365eSSteven Rostedt /* 235077ae365eSSteven Rostedt * Now that we are here, the old head pointer is 235177ae365eSSteven Rostedt * set to UPDATE. This will keep the reader from 235277ae365eSSteven Rostedt * swapping the head page with the reader page. 235377ae365eSSteven Rostedt * The reader (on another CPU) will spin till 235477ae365eSSteven Rostedt * we are finished. 235577ae365eSSteven Rostedt * 235677ae365eSSteven Rostedt * We just need to protect against interrupts 235777ae365eSSteven Rostedt * doing the job. We will set the next pointer 235877ae365eSSteven Rostedt * to HEAD. After that, we set the old pointer 235977ae365eSSteven Rostedt * to NORMAL, but only if it was HEAD before. 236077ae365eSSteven Rostedt * otherwise we are an interrupt, and only 236177ae365eSSteven Rostedt * want the outer most commit to reset it. 236277ae365eSSteven Rostedt */ 236377ae365eSSteven Rostedt new_head = next_page; 236477ae365eSSteven Rostedt rb_inc_page(cpu_buffer, &new_head); 236577ae365eSSteven Rostedt 236677ae365eSSteven Rostedt ret = rb_head_page_set_head(cpu_buffer, new_head, next_page, 236777ae365eSSteven Rostedt RB_PAGE_NORMAL); 236877ae365eSSteven Rostedt 236977ae365eSSteven Rostedt /* 237077ae365eSSteven Rostedt * Valid returns are: 237177ae365eSSteven Rostedt * HEAD - an interrupt came in and already set it. 237277ae365eSSteven Rostedt * NORMAL - One of two things: 237377ae365eSSteven Rostedt * 1) We really set it. 237477ae365eSSteven Rostedt * 2) A bunch of interrupts came in and moved 237577ae365eSSteven Rostedt * the page forward again. 237677ae365eSSteven Rostedt */ 237777ae365eSSteven Rostedt switch (ret) { 237877ae365eSSteven Rostedt case RB_PAGE_HEAD: 237977ae365eSSteven Rostedt case RB_PAGE_NORMAL: 238077ae365eSSteven Rostedt /* OK */ 238177ae365eSSteven Rostedt break; 238277ae365eSSteven Rostedt default: 238377ae365eSSteven Rostedt RB_WARN_ON(cpu_buffer, 1); 238477ae365eSSteven Rostedt return -1; 238577ae365eSSteven Rostedt } 238677ae365eSSteven Rostedt 238777ae365eSSteven Rostedt /* 238877ae365eSSteven Rostedt * It is possible that an interrupt came in, 238977ae365eSSteven Rostedt * set the head up, then more interrupts came in 239077ae365eSSteven Rostedt * and moved it again. When we get back here, 239177ae365eSSteven Rostedt * the page would have been set to NORMAL but we 239277ae365eSSteven Rostedt * just set it back to HEAD. 239377ae365eSSteven Rostedt * 239477ae365eSSteven Rostedt * How do you detect this? Well, if that happened 239577ae365eSSteven Rostedt * the tail page would have moved. 239677ae365eSSteven Rostedt */ 239777ae365eSSteven Rostedt if (ret == RB_PAGE_NORMAL) { 23988573636eSSteven Rostedt (Red Hat) struct buffer_page *buffer_tail_page; 23998573636eSSteven Rostedt (Red Hat) 24008573636eSSteven Rostedt (Red Hat) buffer_tail_page = READ_ONCE(cpu_buffer->tail_page); 240177ae365eSSteven Rostedt /* 240277ae365eSSteven Rostedt * If the tail had moved passed next, then we need 240377ae365eSSteven Rostedt * to reset the pointer. 240477ae365eSSteven Rostedt */ 24058573636eSSteven Rostedt (Red Hat) if (buffer_tail_page != tail_page && 24068573636eSSteven Rostedt (Red Hat) buffer_tail_page != next_page) 240777ae365eSSteven Rostedt rb_head_page_set_normal(cpu_buffer, new_head, 240877ae365eSSteven Rostedt next_page, 240977ae365eSSteven Rostedt RB_PAGE_HEAD); 241077ae365eSSteven Rostedt } 241177ae365eSSteven Rostedt 241277ae365eSSteven Rostedt /* 241377ae365eSSteven Rostedt * If this was the outer most commit (the one that 241477ae365eSSteven Rostedt * changed the original pointer from HEAD to UPDATE), 241577ae365eSSteven Rostedt * then it is up to us to reset it to NORMAL. 241677ae365eSSteven Rostedt */ 241777ae365eSSteven Rostedt if (type == RB_PAGE_HEAD) { 241877ae365eSSteven Rostedt ret = rb_head_page_set_normal(cpu_buffer, next_page, 241977ae365eSSteven Rostedt tail_page, 242077ae365eSSteven Rostedt RB_PAGE_UPDATE); 242177ae365eSSteven Rostedt if (RB_WARN_ON(cpu_buffer, 242277ae365eSSteven Rostedt ret != RB_PAGE_UPDATE)) 242377ae365eSSteven Rostedt return -1; 242477ae365eSSteven Rostedt } 242577ae365eSSteven Rostedt 242677ae365eSSteven Rostedt return 0; 242777ae365eSSteven Rostedt } 242877ae365eSSteven Rostedt 2429c7b09308SSteven Rostedt static inline void 2430c7b09308SSteven Rostedt rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, 2431fcc742eaSSteven Rostedt (Red Hat) unsigned long tail, struct rb_event_info *info) 2432c7b09308SSteven Rostedt { 2433fcc742eaSSteven Rostedt (Red Hat) struct buffer_page *tail_page = info->tail_page; 2434c7b09308SSteven Rostedt struct ring_buffer_event *event; 2435fcc742eaSSteven Rostedt (Red Hat) unsigned long length = info->length; 2436c7b09308SSteven Rostedt 2437c7b09308SSteven Rostedt /* 2438c7b09308SSteven Rostedt * Only the event that crossed the page boundary 2439c7b09308SSteven Rostedt * must fill the old tail_page with padding. 2440c7b09308SSteven Rostedt */ 2441c7b09308SSteven Rostedt if (tail >= BUF_PAGE_SIZE) { 2442b3230c8bSSteven Rostedt /* 2443b3230c8bSSteven Rostedt * If the page was filled, then we still need 2444b3230c8bSSteven Rostedt * to update the real_end. Reset it to zero 2445b3230c8bSSteven Rostedt * and the reader will ignore it. 2446b3230c8bSSteven Rostedt */ 2447b3230c8bSSteven Rostedt if (tail == BUF_PAGE_SIZE) 2448b3230c8bSSteven Rostedt tail_page->real_end = 0; 2449b3230c8bSSteven Rostedt 2450c7b09308SSteven Rostedt local_sub(length, &tail_page->write); 2451c7b09308SSteven Rostedt return; 2452c7b09308SSteven Rostedt } 2453c7b09308SSteven Rostedt 2454c7b09308SSteven Rostedt event = __rb_page_index(tail_page, tail); 2455c7b09308SSteven Rostedt 2456c64e148aSVaibhav Nagarnaik /* account for padding bytes */ 2457c64e148aSVaibhav Nagarnaik local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); 2458c64e148aSVaibhav Nagarnaik 2459c7b09308SSteven Rostedt /* 2460ff0ff84aSSteven Rostedt * Save the original length to the meta data. 2461ff0ff84aSSteven Rostedt * This will be used by the reader to add lost event 2462ff0ff84aSSteven Rostedt * counter. 2463ff0ff84aSSteven Rostedt */ 2464ff0ff84aSSteven Rostedt tail_page->real_end = tail; 2465ff0ff84aSSteven Rostedt 2466ff0ff84aSSteven Rostedt /* 2467c7b09308SSteven Rostedt * If this event is bigger than the minimum size, then 2468c7b09308SSteven Rostedt * we need to be careful that we don't subtract the 2469c7b09308SSteven Rostedt * write counter enough to allow another writer to slip 2470c7b09308SSteven Rostedt * in on this page. 2471c7b09308SSteven Rostedt * We put in a discarded commit instead, to make sure 2472c7b09308SSteven Rostedt * that this space is not used again. 2473c7b09308SSteven Rostedt * 2474c7b09308SSteven Rostedt * If we are less than the minimum size, we don't need to 2475c7b09308SSteven Rostedt * worry about it. 2476c7b09308SSteven Rostedt */ 2477c7b09308SSteven Rostedt if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) { 2478c7b09308SSteven Rostedt /* No room for any events */ 2479c7b09308SSteven Rostedt 2480c7b09308SSteven Rostedt /* Mark the rest of the page with padding */ 2481c7b09308SSteven Rostedt rb_event_set_padding(event); 2482c7b09308SSteven Rostedt 2483c7b09308SSteven Rostedt /* Set the write back to the previous setting */ 2484c7b09308SSteven Rostedt local_sub(length, &tail_page->write); 2485c7b09308SSteven Rostedt return; 2486c7b09308SSteven Rostedt } 2487c7b09308SSteven Rostedt 2488c7b09308SSteven Rostedt /* Put in a discarded event */ 2489c7b09308SSteven Rostedt event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE; 2490c7b09308SSteven Rostedt event->type_len = RINGBUF_TYPE_PADDING; 2491c7b09308SSteven Rostedt /* time delta must be non zero */ 2492c7b09308SSteven Rostedt event->time_delta = 1; 2493c7b09308SSteven Rostedt 2494c7b09308SSteven Rostedt /* Set write to end of buffer */ 2495c7b09308SSteven Rostedt length = (tail + length) - BUF_PAGE_SIZE; 2496c7b09308SSteven Rostedt local_sub(length, &tail_page->write); 2497c7b09308SSteven Rostedt } 24986634ff26SSteven Rostedt 24994239c38fSSteven Rostedt (Red Hat) static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer); 25004239c38fSSteven Rostedt (Red Hat) 2501747e94aeSSteven Rostedt /* 2502747e94aeSSteven Rostedt * This is the slow path, force gcc not to inline it. 2503747e94aeSSteven Rostedt */ 2504747e94aeSSteven Rostedt static noinline struct ring_buffer_event * 25056634ff26SSteven Rostedt rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, 2506fcc742eaSSteven Rostedt (Red Hat) unsigned long tail, struct rb_event_info *info) 25077a8e76a3SSteven Rostedt { 2508fcc742eaSSteven Rostedt (Red Hat) struct buffer_page *tail_page = info->tail_page; 25095a50e33cSSteven Rostedt struct buffer_page *commit_page = cpu_buffer->commit_page; 251013292494SSteven Rostedt (VMware) struct trace_buffer *buffer = cpu_buffer->buffer; 251177ae365eSSteven Rostedt struct buffer_page *next_page; 251277ae365eSSteven Rostedt int ret; 2513aa20ae84SSteven Rostedt 2514aa20ae84SSteven Rostedt next_page = tail_page; 25157a8e76a3SSteven Rostedt 25167a8e76a3SSteven Rostedt rb_inc_page(cpu_buffer, &next_page); 25177a8e76a3SSteven Rostedt 2518bf41a158SSteven Rostedt /* 2519bf41a158SSteven Rostedt * If for some reason, we had an interrupt storm that made 2520bf41a158SSteven Rostedt * it all the way around the buffer, bail, and warn 2521bf41a158SSteven Rostedt * about it. 2522bf41a158SSteven Rostedt */ 252398db8df7SSteven Rostedt if (unlikely(next_page == commit_page)) { 252477ae365eSSteven Rostedt local_inc(&cpu_buffer->commit_overrun); 252545141d46SSteven Rostedt goto out_reset; 2526bf41a158SSteven Rostedt } 2527d769041fSSteven Rostedt 2528bf41a158SSteven Rostedt /* 252977ae365eSSteven Rostedt * This is where the fun begins! 253077ae365eSSteven Rostedt * 253177ae365eSSteven Rostedt * We are fighting against races between a reader that 253277ae365eSSteven Rostedt * could be on another CPU trying to swap its reader 253377ae365eSSteven Rostedt * page with the buffer head. 253477ae365eSSteven Rostedt * 253577ae365eSSteven Rostedt * We are also fighting against interrupts coming in and 253677ae365eSSteven Rostedt * moving the head or tail on us as well. 253777ae365eSSteven Rostedt * 253877ae365eSSteven Rostedt * If the next page is the head page then we have filled 253977ae365eSSteven Rostedt * the buffer, unless the commit page is still on the 254077ae365eSSteven Rostedt * reader page. 2541bf41a158SSteven Rostedt */ 254277ae365eSSteven Rostedt if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) { 2543bf41a158SSteven Rostedt 254477ae365eSSteven Rostedt /* 254577ae365eSSteven Rostedt * If the commit is not on the reader page, then 254677ae365eSSteven Rostedt * move the header page. 254777ae365eSSteven Rostedt */ 254877ae365eSSteven Rostedt if (!rb_is_reader_page(cpu_buffer->commit_page)) { 254977ae365eSSteven Rostedt /* 255077ae365eSSteven Rostedt * If we are not in overwrite mode, 255177ae365eSSteven Rostedt * this is easy, just stop here. 255277ae365eSSteven Rostedt */ 2553884bfe89SSlava Pestov if (!(buffer->flags & RB_FL_OVERWRITE)) { 2554884bfe89SSlava Pestov local_inc(&cpu_buffer->dropped_events); 255577ae365eSSteven Rostedt goto out_reset; 2556884bfe89SSlava Pestov } 255777ae365eSSteven Rostedt 255877ae365eSSteven Rostedt ret = rb_handle_head_page(cpu_buffer, 255977ae365eSSteven Rostedt tail_page, 256077ae365eSSteven Rostedt next_page); 256177ae365eSSteven Rostedt if (ret < 0) 256277ae365eSSteven Rostedt goto out_reset; 256377ae365eSSteven Rostedt if (ret) 256477ae365eSSteven Rostedt goto out_again; 256577ae365eSSteven Rostedt } else { 256677ae365eSSteven Rostedt /* 256777ae365eSSteven Rostedt * We need to be careful here too. The 256877ae365eSSteven Rostedt * commit page could still be on the reader 256977ae365eSSteven Rostedt * page. We could have a small buffer, and 257077ae365eSSteven Rostedt * have filled up the buffer with events 257177ae365eSSteven Rostedt * from interrupts and such, and wrapped. 257277ae365eSSteven Rostedt * 257377ae365eSSteven Rostedt * Note, if the tail page is also the on the 257477ae365eSSteven Rostedt * reader_page, we let it move out. 257577ae365eSSteven Rostedt */ 257677ae365eSSteven Rostedt if (unlikely((cpu_buffer->commit_page != 257777ae365eSSteven Rostedt cpu_buffer->tail_page) && 257877ae365eSSteven Rostedt (cpu_buffer->commit_page == 257977ae365eSSteven Rostedt cpu_buffer->reader_page))) { 258077ae365eSSteven Rostedt local_inc(&cpu_buffer->commit_overrun); 258177ae365eSSteven Rostedt goto out_reset; 258277ae365eSSteven Rostedt } 258377ae365eSSteven Rostedt } 2584bf41a158SSteven Rostedt } 2585bf41a158SSteven Rostedt 258670004986SSteven Rostedt (Red Hat) rb_tail_page_update(cpu_buffer, tail_page, next_page); 25877a8e76a3SSteven Rostedt 258877ae365eSSteven Rostedt out_again: 258977ae365eSSteven Rostedt 2590fcc742eaSSteven Rostedt (Red Hat) rb_reset_tail(cpu_buffer, tail, info); 2591bf41a158SSteven Rostedt 25924239c38fSSteven Rostedt (Red Hat) /* Commit what we have for now. */ 25934239c38fSSteven Rostedt (Red Hat) rb_end_commit(cpu_buffer); 25944239c38fSSteven Rostedt (Red Hat) /* rb_end_commit() decs committing */ 25954239c38fSSteven Rostedt (Red Hat) local_inc(&cpu_buffer->committing); 25964239c38fSSteven Rostedt (Red Hat) 2597bf41a158SSteven Rostedt /* fail and let the caller try again */ 2598bf41a158SSteven Rostedt return ERR_PTR(-EAGAIN); 2599bf41a158SSteven Rostedt 260045141d46SSteven Rostedt out_reset: 26016f3b3440SLai Jiangshan /* reset write */ 2602fcc742eaSSteven Rostedt (Red Hat) rb_reset_tail(cpu_buffer, tail, info); 26036f3b3440SLai Jiangshan 2604bf41a158SSteven Rostedt return NULL; 26057a8e76a3SSteven Rostedt } 26067a8e76a3SSteven Rostedt 260774e87937SSteven Rostedt (VMware) /* Slow path */ 260874e87937SSteven Rostedt (VMware) static struct ring_buffer_event * 2609dc4e2801STom Zanussi rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs) 2610d90fd774SSteven Rostedt (Red Hat) { 2611dc4e2801STom Zanussi if (abs) 2612dc4e2801STom Zanussi event->type_len = RINGBUF_TYPE_TIME_STAMP; 2613dc4e2801STom Zanussi else 2614d90fd774SSteven Rostedt (Red Hat) event->type_len = RINGBUF_TYPE_TIME_EXTEND; 2615d90fd774SSteven Rostedt (Red Hat) 2616dc4e2801STom Zanussi /* Not the first event on the page, or not delta? */ 2617dc4e2801STom Zanussi if (abs || rb_event_index(event)) { 2618d90fd774SSteven Rostedt (Red Hat) event->time_delta = delta & TS_MASK; 2619d90fd774SSteven Rostedt (Red Hat) event->array[0] = delta >> TS_SHIFT; 2620d90fd774SSteven Rostedt (Red Hat) } else { 2621d90fd774SSteven Rostedt (Red Hat) /* nope, just zero it */ 2622d90fd774SSteven Rostedt (Red Hat) event->time_delta = 0; 2623d90fd774SSteven Rostedt (Red Hat) event->array[0] = 0; 2624d90fd774SSteven Rostedt (Red Hat) } 2625d90fd774SSteven Rostedt (Red Hat) 2626d90fd774SSteven Rostedt (Red Hat) return skip_time_extend(event); 2627d90fd774SSteven Rostedt (Red Hat) } 2628d90fd774SSteven Rostedt (Red Hat) 2629cdb2a0a9SYaowei Bai static inline bool rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer, 2630b7dc42fdSSteven Rostedt (Red Hat) struct ring_buffer_event *event); 2631b7dc42fdSSteven Rostedt (Red Hat) 263258fbc3c6SSteven Rostedt (VMware) #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 263358fbc3c6SSteven Rostedt (VMware) static inline bool sched_clock_stable(void) 263458fbc3c6SSteven Rostedt (VMware) { 263558fbc3c6SSteven Rostedt (VMware) return true; 263658fbc3c6SSteven Rostedt (VMware) } 263758fbc3c6SSteven Rostedt (VMware) #endif 263858fbc3c6SSteven Rostedt (VMware) 263974e87937SSteven Rostedt (VMware) static void 264058fbc3c6SSteven Rostedt (VMware) rb_check_timestamp(struct ring_buffer_per_cpu *cpu_buffer, 264158fbc3c6SSteven Rostedt (VMware) struct rb_event_info *info) 264258fbc3c6SSteven Rostedt (VMware) { 264358fbc3c6SSteven Rostedt (VMware) u64 write_stamp; 264458fbc3c6SSteven Rostedt (VMware) 264529ce2451SSteven Rostedt (VMware) WARN_ONCE(1, "Delta way too big! %llu ts=%llu before=%llu after=%llu write stamp=%llu\n%s", 264658fbc3c6SSteven Rostedt (VMware) (unsigned long long)info->delta, 264758fbc3c6SSteven Rostedt (VMware) (unsigned long long)info->ts, 264858fbc3c6SSteven Rostedt (VMware) (unsigned long long)info->before, 264958fbc3c6SSteven Rostedt (VMware) (unsigned long long)info->after, 265058fbc3c6SSteven Rostedt (VMware) (unsigned long long)(rb_time_read(&cpu_buffer->write_stamp, &write_stamp) ? write_stamp : 0), 265158fbc3c6SSteven Rostedt (VMware) sched_clock_stable() ? "" : 265258fbc3c6SSteven Rostedt (VMware) "If you just came from a suspend/resume,\n" 265358fbc3c6SSteven Rostedt (VMware) "please switch to the trace global clock:\n" 265458fbc3c6SSteven Rostedt (VMware) " echo global > /sys/kernel/debug/tracing/trace_clock\n" 265558fbc3c6SSteven Rostedt (VMware) "or add trace_clock=global to the kernel command line\n"); 265658fbc3c6SSteven Rostedt (VMware) } 265758fbc3c6SSteven Rostedt (VMware) 265874e87937SSteven Rostedt (VMware) static void rb_add_timestamp(struct ring_buffer_per_cpu *cpu_buffer, 265974e87937SSteven Rostedt (VMware) struct ring_buffer_event **event, 266074e87937SSteven Rostedt (VMware) struct rb_event_info *info, 266174e87937SSteven Rostedt (VMware) u64 *delta, 266274e87937SSteven Rostedt (VMware) unsigned int *length) 266374e87937SSteven Rostedt (VMware) { 266474e87937SSteven Rostedt (VMware) bool abs = info->add_timestamp & 266574e87937SSteven Rostedt (VMware) (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE); 266674e87937SSteven Rostedt (VMware) 266729ce2451SSteven Rostedt (VMware) if (unlikely(info->delta > (1ULL << 59))) { 266829ce2451SSteven Rostedt (VMware) /* did the clock go backwards */ 266929ce2451SSteven Rostedt (VMware) if (info->before == info->after && info->before > info->ts) { 267029ce2451SSteven Rostedt (VMware) /* not interrupted */ 267129ce2451SSteven Rostedt (VMware) static int once; 267229ce2451SSteven Rostedt (VMware) 267329ce2451SSteven Rostedt (VMware) /* 267429ce2451SSteven Rostedt (VMware) * This is possible with a recalibrating of the TSC. 267529ce2451SSteven Rostedt (VMware) * Do not produce a call stack, but just report it. 267629ce2451SSteven Rostedt (VMware) */ 267729ce2451SSteven Rostedt (VMware) if (!once) { 267829ce2451SSteven Rostedt (VMware) once++; 267929ce2451SSteven Rostedt (VMware) pr_warn("Ring buffer clock went backwards: %llu -> %llu\n", 268029ce2451SSteven Rostedt (VMware) info->before, info->ts); 268129ce2451SSteven Rostedt (VMware) } 268229ce2451SSteven Rostedt (VMware) } else 268374e87937SSteven Rostedt (VMware) rb_check_timestamp(cpu_buffer, info); 268429ce2451SSteven Rostedt (VMware) if (!abs) 268529ce2451SSteven Rostedt (VMware) info->delta = 0; 268629ce2451SSteven Rostedt (VMware) } 268774e87937SSteven Rostedt (VMware) *event = rb_add_time_stamp(*event, info->delta, abs); 268874e87937SSteven Rostedt (VMware) *length -= RB_LEN_TIME_EXTEND; 268974e87937SSteven Rostedt (VMware) *delta = 0; 269074e87937SSteven Rostedt (VMware) } 269174e87937SSteven Rostedt (VMware) 2692d90fd774SSteven Rostedt (Red Hat) /** 2693d90fd774SSteven Rostedt (Red Hat) * rb_update_event - update event type and data 2694cfc585a4SSteven Rostedt (VMware) * @cpu_buffer: The per cpu buffer of the @event 2695d90fd774SSteven Rostedt (Red Hat) * @event: the event to update 2696cfc585a4SSteven Rostedt (VMware) * @info: The info to update the @event with (contains length and delta) 2697d90fd774SSteven Rostedt (Red Hat) * 2698cfc585a4SSteven Rostedt (VMware) * Update the type and data fields of the @event. The length 2699d90fd774SSteven Rostedt (Red Hat) * is the actual size that is written to the ring buffer, 2700d90fd774SSteven Rostedt (Red Hat) * and with this, we can determine what to place into the 2701d90fd774SSteven Rostedt (Red Hat) * data field. 2702d90fd774SSteven Rostedt (Red Hat) */ 2703b7dc42fdSSteven Rostedt (Red Hat) static void 2704d90fd774SSteven Rostedt (Red Hat) rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, 2705d90fd774SSteven Rostedt (Red Hat) struct ring_buffer_event *event, 2706d90fd774SSteven Rostedt (Red Hat) struct rb_event_info *info) 2707d90fd774SSteven Rostedt (Red Hat) { 2708d90fd774SSteven Rostedt (Red Hat) unsigned length = info->length; 2709d90fd774SSteven Rostedt (Red Hat) u64 delta = info->delta; 2710d90fd774SSteven Rostedt (Red Hat) 2711d90fd774SSteven Rostedt (Red Hat) /* 2712d90fd774SSteven Rostedt (Red Hat) * If we need to add a timestamp, then we 27136167c205SSteven Rostedt (VMware) * add it to the start of the reserved space. 2714d90fd774SSteven Rostedt (Red Hat) */ 271574e87937SSteven Rostedt (VMware) if (unlikely(info->add_timestamp)) 271674e87937SSteven Rostedt (VMware) rb_add_timestamp(cpu_buffer, &event, info, &delta, &length); 2717d90fd774SSteven Rostedt (Red Hat) 2718d90fd774SSteven Rostedt (Red Hat) event->time_delta = delta; 2719d90fd774SSteven Rostedt (Red Hat) length -= RB_EVNT_HDR_SIZE; 272086b3de60SSteven Rostedt (VMware) if (length > RB_MAX_SMALL_DATA) { 2721d90fd774SSteven Rostedt (Red Hat) event->type_len = 0; 2722d90fd774SSteven Rostedt (Red Hat) event->array[0] = length; 2723d90fd774SSteven Rostedt (Red Hat) } else 2724d90fd774SSteven Rostedt (Red Hat) event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); 2725d90fd774SSteven Rostedt (Red Hat) } 2726d90fd774SSteven Rostedt (Red Hat) 2727d90fd774SSteven Rostedt (Red Hat) static unsigned rb_calculate_event_length(unsigned length) 2728d90fd774SSteven Rostedt (Red Hat) { 2729d90fd774SSteven Rostedt (Red Hat) struct ring_buffer_event event; /* Used only for sizeof array */ 2730d90fd774SSteven Rostedt (Red Hat) 2731d90fd774SSteven Rostedt (Red Hat) /* zero length can cause confusions */ 2732d90fd774SSteven Rostedt (Red Hat) if (!length) 2733d90fd774SSteven Rostedt (Red Hat) length++; 2734d90fd774SSteven Rostedt (Red Hat) 273586b3de60SSteven Rostedt (VMware) if (length > RB_MAX_SMALL_DATA) 2736d90fd774SSteven Rostedt (Red Hat) length += sizeof(event.array[0]); 2737d90fd774SSteven Rostedt (Red Hat) 2738d90fd774SSteven Rostedt (Red Hat) length += RB_EVNT_HDR_SIZE; 273986b3de60SSteven Rostedt (VMware) length = ALIGN(length, RB_ALIGNMENT); 2740d90fd774SSteven Rostedt (Red Hat) 2741d90fd774SSteven Rostedt (Red Hat) /* 2742d90fd774SSteven Rostedt (Red Hat) * In case the time delta is larger than the 27 bits for it 2743d90fd774SSteven Rostedt (Red Hat) * in the header, we need to add a timestamp. If another 2744d90fd774SSteven Rostedt (Red Hat) * event comes in when trying to discard this one to increase 2745d90fd774SSteven Rostedt (Red Hat) * the length, then the timestamp will be added in the allocated 2746d90fd774SSteven Rostedt (Red Hat) * space of this event. If length is bigger than the size needed 2747d90fd774SSteven Rostedt (Red Hat) * for the TIME_EXTEND, then padding has to be used. The events 2748d90fd774SSteven Rostedt (Red Hat) * length must be either RB_LEN_TIME_EXTEND, or greater than or equal 2749d90fd774SSteven Rostedt (Red Hat) * to RB_LEN_TIME_EXTEND + 8, as 8 is the minimum size for padding. 2750d90fd774SSteven Rostedt (Red Hat) * As length is a multiple of 4, we only need to worry if it 2751d90fd774SSteven Rostedt (Red Hat) * is 12 (RB_LEN_TIME_EXTEND + 4). 2752d90fd774SSteven Rostedt (Red Hat) */ 2753d90fd774SSteven Rostedt (Red Hat) if (length == RB_LEN_TIME_EXTEND + RB_ALIGNMENT) 2754d90fd774SSteven Rostedt (Red Hat) length += RB_ALIGNMENT; 2755d90fd774SSteven Rostedt (Red Hat) 2756d90fd774SSteven Rostedt (Red Hat) return length; 2757d90fd774SSteven Rostedt (Red Hat) } 2758d90fd774SSteven Rostedt (Red Hat) 2759a389d86fSSteven Rostedt (VMware) static __always_inline bool 2760a389d86fSSteven Rostedt (VMware) rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer, 2761a389d86fSSteven Rostedt (VMware) struct ring_buffer_event *event) 27629826b273SSteven Rostedt (Red Hat) { 2763a389d86fSSteven Rostedt (VMware) unsigned long addr = (unsigned long)event; 2764a389d86fSSteven Rostedt (VMware) unsigned long index; 2765a389d86fSSteven Rostedt (VMware) 2766a389d86fSSteven Rostedt (VMware) index = rb_event_index(event); 2767a389d86fSSteven Rostedt (VMware) addr &= PAGE_MASK; 2768a389d86fSSteven Rostedt (VMware) 2769a389d86fSSteven Rostedt (VMware) return cpu_buffer->commit_page->page == (void *)addr && 2770a389d86fSSteven Rostedt (VMware) rb_commit_index(cpu_buffer) == index; 27719826b273SSteven Rostedt (Red Hat) } 2772a389d86fSSteven Rostedt (VMware) 2773a389d86fSSteven Rostedt (VMware) static u64 rb_time_delta(struct ring_buffer_event *event) 2774a389d86fSSteven Rostedt (VMware) { 2775a389d86fSSteven Rostedt (VMware) switch (event->type_len) { 2776a389d86fSSteven Rostedt (VMware) case RINGBUF_TYPE_PADDING: 2777a389d86fSSteven Rostedt (VMware) return 0; 2778a389d86fSSteven Rostedt (VMware) 2779a389d86fSSteven Rostedt (VMware) case RINGBUF_TYPE_TIME_EXTEND: 2780a389d86fSSteven Rostedt (VMware) return ring_buffer_event_time_stamp(event); 2781a389d86fSSteven Rostedt (VMware) 2782a389d86fSSteven Rostedt (VMware) case RINGBUF_TYPE_TIME_STAMP: 2783a389d86fSSteven Rostedt (VMware) return 0; 2784a389d86fSSteven Rostedt (VMware) 2785a389d86fSSteven Rostedt (VMware) case RINGBUF_TYPE_DATA: 2786a389d86fSSteven Rostedt (VMware) return event->time_delta; 2787a389d86fSSteven Rostedt (VMware) default: 2788a389d86fSSteven Rostedt (VMware) return 0; 2789a389d86fSSteven Rostedt (VMware) } 2790a389d86fSSteven Rostedt (VMware) } 27919826b273SSteven Rostedt (Red Hat) 2792a4543a2fSSteven Rostedt (Red Hat) static inline int 2793a4543a2fSSteven Rostedt (Red Hat) rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, 2794d90fd774SSteven Rostedt (Red Hat) struct ring_buffer_event *event) 2795d90fd774SSteven Rostedt (Red Hat) { 2796d90fd774SSteven Rostedt (Red Hat) unsigned long new_index, old_index; 2797d90fd774SSteven Rostedt (Red Hat) struct buffer_page *bpage; 2798d90fd774SSteven Rostedt (Red Hat) unsigned long index; 2799d90fd774SSteven Rostedt (Red Hat) unsigned long addr; 2800a389d86fSSteven Rostedt (VMware) u64 write_stamp; 2801a389d86fSSteven Rostedt (VMware) u64 delta; 2802d90fd774SSteven Rostedt (Red Hat) 2803d90fd774SSteven Rostedt (Red Hat) new_index = rb_event_index(event); 2804d90fd774SSteven Rostedt (Red Hat) old_index = new_index + rb_event_ts_length(event); 2805d90fd774SSteven Rostedt (Red Hat) addr = (unsigned long)event; 2806d90fd774SSteven Rostedt (Red Hat) addr &= PAGE_MASK; 2807d90fd774SSteven Rostedt (Red Hat) 28088573636eSSteven Rostedt (Red Hat) bpage = READ_ONCE(cpu_buffer->tail_page); 2809d90fd774SSteven Rostedt (Red Hat) 2810a389d86fSSteven Rostedt (VMware) delta = rb_time_delta(event); 2811a389d86fSSteven Rostedt (VMware) 281210464b4aSSteven Rostedt (VMware) if (!rb_time_read(&cpu_buffer->write_stamp, &write_stamp)) 281310464b4aSSteven Rostedt (VMware) return 0; 2814a389d86fSSteven Rostedt (VMware) 2815a389d86fSSteven Rostedt (VMware) /* Make sure the write stamp is read before testing the location */ 2816a389d86fSSteven Rostedt (VMware) barrier(); 2817a389d86fSSteven Rostedt (VMware) 2818d90fd774SSteven Rostedt (Red Hat) if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { 2819d90fd774SSteven Rostedt (Red Hat) unsigned long write_mask = 2820d90fd774SSteven Rostedt (Red Hat) local_read(&bpage->write) & ~RB_WRITE_MASK; 2821d90fd774SSteven Rostedt (Red Hat) unsigned long event_length = rb_event_length(event); 2822a389d86fSSteven Rostedt (VMware) 2823a389d86fSSteven Rostedt (VMware) /* Something came in, can't discard */ 282410464b4aSSteven Rostedt (VMware) if (!rb_time_cmpxchg(&cpu_buffer->write_stamp, 282510464b4aSSteven Rostedt (VMware) write_stamp, write_stamp - delta)) 2826a389d86fSSteven Rostedt (VMware) return 0; 2827a389d86fSSteven Rostedt (VMware) 2828a389d86fSSteven Rostedt (VMware) /* 2829a389d86fSSteven Rostedt (VMware) * If an event were to come in now, it would see that the 2830a389d86fSSteven Rostedt (VMware) * write_stamp and the before_stamp are different, and assume 2831a389d86fSSteven Rostedt (VMware) * that this event just added itself before updating 2832a389d86fSSteven Rostedt (VMware) * the write stamp. The interrupting event will fix the 2833a389d86fSSteven Rostedt (VMware) * write stamp for us, and use the before stamp as its delta. 2834a389d86fSSteven Rostedt (VMware) */ 2835a389d86fSSteven Rostedt (VMware) 2836d90fd774SSteven Rostedt (Red Hat) /* 2837d90fd774SSteven Rostedt (Red Hat) * This is on the tail page. It is possible that 2838d90fd774SSteven Rostedt (Red Hat) * a write could come in and move the tail page 2839d90fd774SSteven Rostedt (Red Hat) * and write to the next page. That is fine 2840d90fd774SSteven Rostedt (Red Hat) * because we just shorten what is on this page. 2841d90fd774SSteven Rostedt (Red Hat) */ 2842d90fd774SSteven Rostedt (Red Hat) old_index += write_mask; 2843d90fd774SSteven Rostedt (Red Hat) new_index += write_mask; 2844d90fd774SSteven Rostedt (Red Hat) index = local_cmpxchg(&bpage->write, old_index, new_index); 2845d90fd774SSteven Rostedt (Red Hat) if (index == old_index) { 2846d90fd774SSteven Rostedt (Red Hat) /* update counters */ 2847d90fd774SSteven Rostedt (Red Hat) local_sub(event_length, &cpu_buffer->entries_bytes); 2848d90fd774SSteven Rostedt (Red Hat) return 1; 2849d90fd774SSteven Rostedt (Red Hat) } 2850d90fd774SSteven Rostedt (Red Hat) } 2851d90fd774SSteven Rostedt (Red Hat) 2852d90fd774SSteven Rostedt (Red Hat) /* could not discard */ 2853d90fd774SSteven Rostedt (Red Hat) return 0; 2854d90fd774SSteven Rostedt (Red Hat) } 2855d90fd774SSteven Rostedt (Red Hat) 2856d90fd774SSteven Rostedt (Red Hat) static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) 2857d90fd774SSteven Rostedt (Red Hat) { 2858d90fd774SSteven Rostedt (Red Hat) local_inc(&cpu_buffer->committing); 2859d90fd774SSteven Rostedt (Red Hat) local_inc(&cpu_buffer->commits); 2860d90fd774SSteven Rostedt (Red Hat) } 2861d90fd774SSteven Rostedt (Red Hat) 286238e11df1SSteven Rostedt (Red Hat) static __always_inline void 2863d90fd774SSteven Rostedt (Red Hat) rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) 2864d90fd774SSteven Rostedt (Red Hat) { 2865d90fd774SSteven Rostedt (Red Hat) unsigned long max_count; 2866d90fd774SSteven Rostedt (Red Hat) 2867d90fd774SSteven Rostedt (Red Hat) /* 2868d90fd774SSteven Rostedt (Red Hat) * We only race with interrupts and NMIs on this CPU. 2869d90fd774SSteven Rostedt (Red Hat) * If we own the commit event, then we can commit 2870d90fd774SSteven Rostedt (Red Hat) * all others that interrupted us, since the interruptions 2871d90fd774SSteven Rostedt (Red Hat) * are in stack format (they finish before they come 2872d90fd774SSteven Rostedt (Red Hat) * back to us). This allows us to do a simple loop to 2873d90fd774SSteven Rostedt (Red Hat) * assign the commit to the tail. 2874d90fd774SSteven Rostedt (Red Hat) */ 2875d90fd774SSteven Rostedt (Red Hat) again: 2876d90fd774SSteven Rostedt (Red Hat) max_count = cpu_buffer->nr_pages * 100; 2877d90fd774SSteven Rostedt (Red Hat) 28788573636eSSteven Rostedt (Red Hat) while (cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page)) { 2879d90fd774SSteven Rostedt (Red Hat) if (RB_WARN_ON(cpu_buffer, !(--max_count))) 2880d90fd774SSteven Rostedt (Red Hat) return; 2881d90fd774SSteven Rostedt (Red Hat) if (RB_WARN_ON(cpu_buffer, 2882d90fd774SSteven Rostedt (Red Hat) rb_is_reader_page(cpu_buffer->tail_page))) 2883d90fd774SSteven Rostedt (Red Hat) return; 2884d90fd774SSteven Rostedt (Red Hat) local_set(&cpu_buffer->commit_page->page->commit, 2885d90fd774SSteven Rostedt (Red Hat) rb_page_write(cpu_buffer->commit_page)); 2886d90fd774SSteven Rostedt (Red Hat) rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 2887d90fd774SSteven Rostedt (Red Hat) /* add barrier to keep gcc from optimizing too much */ 2888d90fd774SSteven Rostedt (Red Hat) barrier(); 2889d90fd774SSteven Rostedt (Red Hat) } 2890d90fd774SSteven Rostedt (Red Hat) while (rb_commit_index(cpu_buffer) != 2891d90fd774SSteven Rostedt (Red Hat) rb_page_write(cpu_buffer->commit_page)) { 2892d90fd774SSteven Rostedt (Red Hat) 2893d90fd774SSteven Rostedt (Red Hat) local_set(&cpu_buffer->commit_page->page->commit, 2894d90fd774SSteven Rostedt (Red Hat) rb_page_write(cpu_buffer->commit_page)); 2895d90fd774SSteven Rostedt (Red Hat) RB_WARN_ON(cpu_buffer, 2896d90fd774SSteven Rostedt (Red Hat) local_read(&cpu_buffer->commit_page->page->commit) & 2897d90fd774SSteven Rostedt (Red Hat) ~RB_WRITE_MASK); 2898d90fd774SSteven Rostedt (Red Hat) barrier(); 2899d90fd774SSteven Rostedt (Red Hat) } 2900d90fd774SSteven Rostedt (Red Hat) 2901d90fd774SSteven Rostedt (Red Hat) /* again, keep gcc from optimizing */ 2902d90fd774SSteven Rostedt (Red Hat) barrier(); 2903d90fd774SSteven Rostedt (Red Hat) 2904d90fd774SSteven Rostedt (Red Hat) /* 2905d90fd774SSteven Rostedt (Red Hat) * If an interrupt came in just after the first while loop 2906d90fd774SSteven Rostedt (Red Hat) * and pushed the tail page forward, we will be left with 2907d90fd774SSteven Rostedt (Red Hat) * a dangling commit that will never go forward. 2908d90fd774SSteven Rostedt (Red Hat) */ 29098573636eSSteven Rostedt (Red Hat) if (unlikely(cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page))) 2910d90fd774SSteven Rostedt (Red Hat) goto again; 2911d90fd774SSteven Rostedt (Red Hat) } 2912d90fd774SSteven Rostedt (Red Hat) 291338e11df1SSteven Rostedt (Red Hat) static __always_inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) 2914d90fd774SSteven Rostedt (Red Hat) { 2915d90fd774SSteven Rostedt (Red Hat) unsigned long commits; 2916d90fd774SSteven Rostedt (Red Hat) 2917d90fd774SSteven Rostedt (Red Hat) if (RB_WARN_ON(cpu_buffer, 2918d90fd774SSteven Rostedt (Red Hat) !local_read(&cpu_buffer->committing))) 2919d90fd774SSteven Rostedt (Red Hat) return; 2920d90fd774SSteven Rostedt (Red Hat) 2921d90fd774SSteven Rostedt (Red Hat) again: 2922d90fd774SSteven Rostedt (Red Hat) commits = local_read(&cpu_buffer->commits); 2923d90fd774SSteven Rostedt (Red Hat) /* synchronize with interrupts */ 2924d90fd774SSteven Rostedt (Red Hat) barrier(); 2925d90fd774SSteven Rostedt (Red Hat) if (local_read(&cpu_buffer->committing) == 1) 2926d90fd774SSteven Rostedt (Red Hat) rb_set_commit_to_write(cpu_buffer); 2927d90fd774SSteven Rostedt (Red Hat) 2928d90fd774SSteven Rostedt (Red Hat) local_dec(&cpu_buffer->committing); 2929d90fd774SSteven Rostedt (Red Hat) 2930d90fd774SSteven Rostedt (Red Hat) /* synchronize with interrupts */ 2931d90fd774SSteven Rostedt (Red Hat) barrier(); 2932d90fd774SSteven Rostedt (Red Hat) 2933d90fd774SSteven Rostedt (Red Hat) /* 2934d90fd774SSteven Rostedt (Red Hat) * Need to account for interrupts coming in between the 2935d90fd774SSteven Rostedt (Red Hat) * updating of the commit page and the clearing of the 2936d90fd774SSteven Rostedt (Red Hat) * committing counter. 2937d90fd774SSteven Rostedt (Red Hat) */ 2938d90fd774SSteven Rostedt (Red Hat) if (unlikely(local_read(&cpu_buffer->commits) != commits) && 2939d90fd774SSteven Rostedt (Red Hat) !local_read(&cpu_buffer->committing)) { 2940d90fd774SSteven Rostedt (Red Hat) local_inc(&cpu_buffer->committing); 2941d90fd774SSteven Rostedt (Red Hat) goto again; 2942d90fd774SSteven Rostedt (Red Hat) } 2943d90fd774SSteven Rostedt (Red Hat) } 2944d90fd774SSteven Rostedt (Red Hat) 2945d90fd774SSteven Rostedt (Red Hat) static inline void rb_event_discard(struct ring_buffer_event *event) 2946d90fd774SSteven Rostedt (Red Hat) { 2947dc4e2801STom Zanussi if (extended_time(event)) 2948d90fd774SSteven Rostedt (Red Hat) event = skip_time_extend(event); 2949d90fd774SSteven Rostedt (Red Hat) 2950d90fd774SSteven Rostedt (Red Hat) /* array[0] holds the actual length for the discarded event */ 2951d90fd774SSteven Rostedt (Red Hat) event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; 2952d90fd774SSteven Rostedt (Red Hat) event->type_len = RINGBUF_TYPE_PADDING; 2953d90fd774SSteven Rostedt (Red Hat) /* time delta must be non zero */ 2954d90fd774SSteven Rostedt (Red Hat) if (!event->time_delta) 2955d90fd774SSteven Rostedt (Red Hat) event->time_delta = 1; 2956d90fd774SSteven Rostedt (Red Hat) } 2957d90fd774SSteven Rostedt (Red Hat) 2958d90fd774SSteven Rostedt (Red Hat) static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, 2959d90fd774SSteven Rostedt (Red Hat) struct ring_buffer_event *event) 2960d90fd774SSteven Rostedt (Red Hat) { 2961d90fd774SSteven Rostedt (Red Hat) local_inc(&cpu_buffer->entries); 2962d90fd774SSteven Rostedt (Red Hat) rb_end_commit(cpu_buffer); 2963d90fd774SSteven Rostedt (Red Hat) } 2964d90fd774SSteven Rostedt (Red Hat) 2965d90fd774SSteven Rostedt (Red Hat) static __always_inline void 296613292494SSteven Rostedt (VMware) rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) 2967d90fd774SSteven Rostedt (Red Hat) { 296803329f99SSteven Rostedt (VMware) size_t nr_pages; 296903329f99SSteven Rostedt (VMware) size_t dirty; 297003329f99SSteven Rostedt (VMware) size_t full; 2971d90fd774SSteven Rostedt (Red Hat) 2972d90fd774SSteven Rostedt (Red Hat) if (buffer->irq_work.waiters_pending) { 2973d90fd774SSteven Rostedt (Red Hat) buffer->irq_work.waiters_pending = false; 2974d90fd774SSteven Rostedt (Red Hat) /* irq_work_queue() supplies it's own memory barriers */ 2975d90fd774SSteven Rostedt (Red Hat) irq_work_queue(&buffer->irq_work.work); 2976d90fd774SSteven Rostedt (Red Hat) } 2977d90fd774SSteven Rostedt (Red Hat) 2978d90fd774SSteven Rostedt (Red Hat) if (cpu_buffer->irq_work.waiters_pending) { 2979d90fd774SSteven Rostedt (Red Hat) cpu_buffer->irq_work.waiters_pending = false; 2980d90fd774SSteven Rostedt (Red Hat) /* irq_work_queue() supplies it's own memory barriers */ 2981d90fd774SSteven Rostedt (Red Hat) irq_work_queue(&cpu_buffer->irq_work.work); 2982d90fd774SSteven Rostedt (Red Hat) } 2983d90fd774SSteven Rostedt (Red Hat) 298403329f99SSteven Rostedt (VMware) if (cpu_buffer->last_pages_touch == local_read(&cpu_buffer->pages_touched)) 298503329f99SSteven Rostedt (VMware) return; 2986d90fd774SSteven Rostedt (Red Hat) 298703329f99SSteven Rostedt (VMware) if (cpu_buffer->reader_page == cpu_buffer->commit_page) 298803329f99SSteven Rostedt (VMware) return; 298903329f99SSteven Rostedt (VMware) 299003329f99SSteven Rostedt (VMware) if (!cpu_buffer->irq_work.full_waiters_pending) 299103329f99SSteven Rostedt (VMware) return; 299203329f99SSteven Rostedt (VMware) 299303329f99SSteven Rostedt (VMware) cpu_buffer->last_pages_touch = local_read(&cpu_buffer->pages_touched); 29942c2b0a78SSteven Rostedt (VMware) 29952c2b0a78SSteven Rostedt (VMware) full = cpu_buffer->shortest_full; 29962c2b0a78SSteven Rostedt (VMware) nr_pages = cpu_buffer->nr_pages; 29972c2b0a78SSteven Rostedt (VMware) dirty = ring_buffer_nr_dirty_pages(buffer, cpu_buffer->cpu); 29982c2b0a78SSteven Rostedt (VMware) if (full && nr_pages && (dirty * 100) <= full * nr_pages) 29992c2b0a78SSteven Rostedt (VMware) return; 30002c2b0a78SSteven Rostedt (VMware) 3001d90fd774SSteven Rostedt (Red Hat) cpu_buffer->irq_work.wakeup_full = true; 3002d90fd774SSteven Rostedt (Red Hat) cpu_buffer->irq_work.full_waiters_pending = false; 3003d90fd774SSteven Rostedt (Red Hat) /* irq_work_queue() supplies it's own memory barriers */ 3004d90fd774SSteven Rostedt (Red Hat) irq_work_queue(&cpu_buffer->irq_work.work); 3005d90fd774SSteven Rostedt (Red Hat) } 3006d90fd774SSteven Rostedt (Red Hat) 3007d90fd774SSteven Rostedt (Red Hat) /* 3008d90fd774SSteven Rostedt (Red Hat) * The lock and unlock are done within a preempt disable section. 3009d90fd774SSteven Rostedt (Red Hat) * The current_context per_cpu variable can only be modified 3010d90fd774SSteven Rostedt (Red Hat) * by the current task between lock and unlock. But it can 3011a0e3a18fSSteven Rostedt (VMware) * be modified more than once via an interrupt. To pass this 3012a0e3a18fSSteven Rostedt (VMware) * information from the lock to the unlock without having to 3013a0e3a18fSSteven Rostedt (VMware) * access the 'in_interrupt()' functions again (which do show 3014a0e3a18fSSteven Rostedt (VMware) * a bit of overhead in something as critical as function tracing, 3015a0e3a18fSSteven Rostedt (VMware) * we use a bitmask trick. 3016d90fd774SSteven Rostedt (Red Hat) * 3017a0e3a18fSSteven Rostedt (VMware) * bit 0 = NMI context 3018a0e3a18fSSteven Rostedt (VMware) * bit 1 = IRQ context 3019a0e3a18fSSteven Rostedt (VMware) * bit 2 = SoftIRQ context 3020a0e3a18fSSteven Rostedt (VMware) * bit 3 = normal context. 3021d90fd774SSteven Rostedt (Red Hat) * 3022a0e3a18fSSteven Rostedt (VMware) * This works because this is the order of contexts that can 3023a0e3a18fSSteven Rostedt (VMware) * preempt other contexts. A SoftIRQ never preempts an IRQ 3024a0e3a18fSSteven Rostedt (VMware) * context. 3025a0e3a18fSSteven Rostedt (VMware) * 3026a0e3a18fSSteven Rostedt (VMware) * When the context is determined, the corresponding bit is 3027a0e3a18fSSteven Rostedt (VMware) * checked and set (if it was set, then a recursion of that context 3028a0e3a18fSSteven Rostedt (VMware) * happened). 3029a0e3a18fSSteven Rostedt (VMware) * 3030a0e3a18fSSteven Rostedt (VMware) * On unlock, we need to clear this bit. To do so, just subtract 3031a0e3a18fSSteven Rostedt (VMware) * 1 from the current_context and AND it to itself. 3032a0e3a18fSSteven Rostedt (VMware) * 3033a0e3a18fSSteven Rostedt (VMware) * (binary) 3034a0e3a18fSSteven Rostedt (VMware) * 101 - 1 = 100 3035a0e3a18fSSteven Rostedt (VMware) * 101 & 100 = 100 (clearing bit zero) 3036a0e3a18fSSteven Rostedt (VMware) * 3037a0e3a18fSSteven Rostedt (VMware) * 1010 - 1 = 1001 3038a0e3a18fSSteven Rostedt (VMware) * 1010 & 1001 = 1000 (clearing bit 1) 3039a0e3a18fSSteven Rostedt (VMware) * 3040a0e3a18fSSteven Rostedt (VMware) * The least significant bit can be cleared this way, and it 3041a0e3a18fSSteven Rostedt (VMware) * just so happens that it is the same bit corresponding to 3042a0e3a18fSSteven Rostedt (VMware) * the current context. 3043d90fd774SSteven Rostedt (Red Hat) */ 3044d90fd774SSteven Rostedt (Red Hat) 3045d90fd774SSteven Rostedt (Red Hat) static __always_inline int 3046d90fd774SSteven Rostedt (Red Hat) trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) 3047d90fd774SSteven Rostedt (Red Hat) { 3048a0e3a18fSSteven Rostedt (VMware) unsigned int val = cpu_buffer->current_context; 3049a0e3a18fSSteven Rostedt (VMware) unsigned long pc = preempt_count(); 3050a0e3a18fSSteven Rostedt (VMware) int bit; 3051a0e3a18fSSteven Rostedt (VMware) 3052a0e3a18fSSteven Rostedt (VMware) if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) 3053a0e3a18fSSteven Rostedt (VMware) bit = RB_CTX_NORMAL; 3054a0e3a18fSSteven Rostedt (VMware) else 3055a0e3a18fSSteven Rostedt (VMware) bit = pc & NMI_MASK ? RB_CTX_NMI : 30560164e0d7SSteven Rostedt (VMware) pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ; 3057a0e3a18fSSteven Rostedt (VMware) 30588e012066SSteven Rostedt (VMware) if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) 3059d90fd774SSteven Rostedt (Red Hat) return 1; 3060d90fd774SSteven Rostedt (Red Hat) 30618e012066SSteven Rostedt (VMware) val |= (1 << (bit + cpu_buffer->nest)); 3062a0e3a18fSSteven Rostedt (VMware) cpu_buffer->current_context = val; 3063d90fd774SSteven Rostedt (Red Hat) 3064d90fd774SSteven Rostedt (Red Hat) return 0; 3065d90fd774SSteven Rostedt (Red Hat) } 3066d90fd774SSteven Rostedt (Red Hat) 3067d90fd774SSteven Rostedt (Red Hat) static __always_inline void 3068d90fd774SSteven Rostedt (Red Hat) trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) 3069d90fd774SSteven Rostedt (Red Hat) { 30708e012066SSteven Rostedt (VMware) cpu_buffer->current_context &= 30718e012066SSteven Rostedt (VMware) cpu_buffer->current_context - (1 << cpu_buffer->nest); 30728e012066SSteven Rostedt (VMware) } 30738e012066SSteven Rostedt (VMware) 30748e012066SSteven Rostedt (VMware) /* The recursive locking above uses 4 bits */ 30758e012066SSteven Rostedt (VMware) #define NESTED_BITS 4 30768e012066SSteven Rostedt (VMware) 30778e012066SSteven Rostedt (VMware) /** 30788e012066SSteven Rostedt (VMware) * ring_buffer_nest_start - Allow to trace while nested 30798e012066SSteven Rostedt (VMware) * @buffer: The ring buffer to modify 30808e012066SSteven Rostedt (VMware) * 30816167c205SSteven Rostedt (VMware) * The ring buffer has a safety mechanism to prevent recursion. 30828e012066SSteven Rostedt (VMware) * But there may be a case where a trace needs to be done while 30838e012066SSteven Rostedt (VMware) * tracing something else. In this case, calling this function 30848e012066SSteven Rostedt (VMware) * will allow this function to nest within a currently active 30858e012066SSteven Rostedt (VMware) * ring_buffer_lock_reserve(). 30868e012066SSteven Rostedt (VMware) * 30878e012066SSteven Rostedt (VMware) * Call this function before calling another ring_buffer_lock_reserve() and 30888e012066SSteven Rostedt (VMware) * call ring_buffer_nest_end() after the nested ring_buffer_unlock_commit(). 30898e012066SSteven Rostedt (VMware) */ 309013292494SSteven Rostedt (VMware) void ring_buffer_nest_start(struct trace_buffer *buffer) 30918e012066SSteven Rostedt (VMware) { 30928e012066SSteven Rostedt (VMware) struct ring_buffer_per_cpu *cpu_buffer; 30938e012066SSteven Rostedt (VMware) int cpu; 30948e012066SSteven Rostedt (VMware) 30958e012066SSteven Rostedt (VMware) /* Enabled by ring_buffer_nest_end() */ 30968e012066SSteven Rostedt (VMware) preempt_disable_notrace(); 30978e012066SSteven Rostedt (VMware) cpu = raw_smp_processor_id(); 30988e012066SSteven Rostedt (VMware) cpu_buffer = buffer->buffers[cpu]; 30996167c205SSteven Rostedt (VMware) /* This is the shift value for the above recursive locking */ 31008e012066SSteven Rostedt (VMware) cpu_buffer->nest += NESTED_BITS; 31018e012066SSteven Rostedt (VMware) } 31028e012066SSteven Rostedt (VMware) 31038e012066SSteven Rostedt (VMware) /** 31048e012066SSteven Rostedt (VMware) * ring_buffer_nest_end - Allow to trace while nested 31058e012066SSteven Rostedt (VMware) * @buffer: The ring buffer to modify 31068e012066SSteven Rostedt (VMware) * 31078e012066SSteven Rostedt (VMware) * Must be called after ring_buffer_nest_start() and after the 31088e012066SSteven Rostedt (VMware) * ring_buffer_unlock_commit(). 31098e012066SSteven Rostedt (VMware) */ 311013292494SSteven Rostedt (VMware) void ring_buffer_nest_end(struct trace_buffer *buffer) 31118e012066SSteven Rostedt (VMware) { 31128e012066SSteven Rostedt (VMware) struct ring_buffer_per_cpu *cpu_buffer; 31138e012066SSteven Rostedt (VMware) int cpu; 31148e012066SSteven Rostedt (VMware) 31158e012066SSteven Rostedt (VMware) /* disabled by ring_buffer_nest_start() */ 31168e012066SSteven Rostedt (VMware) cpu = raw_smp_processor_id(); 31178e012066SSteven Rostedt (VMware) cpu_buffer = buffer->buffers[cpu]; 31186167c205SSteven Rostedt (VMware) /* This is the shift value for the above recursive locking */ 31198e012066SSteven Rostedt (VMware) cpu_buffer->nest -= NESTED_BITS; 31208e012066SSteven Rostedt (VMware) preempt_enable_notrace(); 3121d90fd774SSteven Rostedt (Red Hat) } 3122d90fd774SSteven Rostedt (Red Hat) 3123d90fd774SSteven Rostedt (Red Hat) /** 3124d90fd774SSteven Rostedt (Red Hat) * ring_buffer_unlock_commit - commit a reserved 3125d90fd774SSteven Rostedt (Red Hat) * @buffer: The buffer to commit to 3126d90fd774SSteven Rostedt (Red Hat) * @event: The event pointer to commit. 3127d90fd774SSteven Rostedt (Red Hat) * 3128d90fd774SSteven Rostedt (Red Hat) * This commits the data to the ring buffer, and releases any locks held. 3129d90fd774SSteven Rostedt (Red Hat) * 3130d90fd774SSteven Rostedt (Red Hat) * Must be paired with ring_buffer_lock_reserve. 3131d90fd774SSteven Rostedt (Red Hat) */ 313213292494SSteven Rostedt (VMware) int ring_buffer_unlock_commit(struct trace_buffer *buffer, 3133d90fd774SSteven Rostedt (Red Hat) struct ring_buffer_event *event) 3134d90fd774SSteven Rostedt (Red Hat) { 3135d90fd774SSteven Rostedt (Red Hat) struct ring_buffer_per_cpu *cpu_buffer; 3136d90fd774SSteven Rostedt (Red Hat) int cpu = raw_smp_processor_id(); 3137d90fd774SSteven Rostedt (Red Hat) 3138d90fd774SSteven Rostedt (Red Hat) cpu_buffer = buffer->buffers[cpu]; 3139d90fd774SSteven Rostedt (Red Hat) 3140d90fd774SSteven Rostedt (Red Hat) rb_commit(cpu_buffer, event); 3141d90fd774SSteven Rostedt (Red Hat) 3142d90fd774SSteven Rostedt (Red Hat) rb_wakeups(buffer, cpu_buffer); 3143d90fd774SSteven Rostedt (Red Hat) 3144d90fd774SSteven Rostedt (Red Hat) trace_recursive_unlock(cpu_buffer); 3145d90fd774SSteven Rostedt (Red Hat) 3146d90fd774SSteven Rostedt (Red Hat) preempt_enable_notrace(); 3147d90fd774SSteven Rostedt (Red Hat) 3148d90fd774SSteven Rostedt (Red Hat) return 0; 3149d90fd774SSteven Rostedt (Red Hat) } 3150d90fd774SSteven Rostedt (Red Hat) EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); 3151a4543a2fSSteven Rostedt (Red Hat) 31526634ff26SSteven Rostedt static struct ring_buffer_event * 31536634ff26SSteven Rostedt __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, 3154fcc742eaSSteven Rostedt (Red Hat) struct rb_event_info *info) 31556634ff26SSteven Rostedt { 31566634ff26SSteven Rostedt struct ring_buffer_event *event; 3157fcc742eaSSteven Rostedt (Red Hat) struct buffer_page *tail_page; 3158a389d86fSSteven Rostedt (VMware) unsigned long tail, write, w; 315910464b4aSSteven Rostedt (VMware) bool a_ok; 316010464b4aSSteven Rostedt (VMware) bool b_ok; 316169d1b839SSteven Rostedt 31628573636eSSteven Rostedt (Red Hat) /* Don't let the compiler play games with cpu_buffer->tail_page */ 31638573636eSSteven Rostedt (Red Hat) tail_page = info->tail_page = READ_ONCE(cpu_buffer->tail_page); 3164a389d86fSSteven Rostedt (VMware) 3165a389d86fSSteven Rostedt (VMware) /*A*/ w = local_read(&tail_page->write) & RB_WRITE_MASK; 3166a389d86fSSteven Rostedt (VMware) barrier(); 316758fbc3c6SSteven Rostedt (VMware) b_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before); 316858fbc3c6SSteven Rostedt (VMware) a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); 3169a389d86fSSteven Rostedt (VMware) barrier(); 3170a389d86fSSteven Rostedt (VMware) info->ts = rb_time_stamp(cpu_buffer->buffer); 3171a389d86fSSteven Rostedt (VMware) 317258fbc3c6SSteven Rostedt (VMware) if ((info->add_timestamp & RB_ADD_STAMP_ABSOLUTE)) { 3173a389d86fSSteven Rostedt (VMware) info->delta = info->ts; 3174a389d86fSSteven Rostedt (VMware) } else { 3175a389d86fSSteven Rostedt (VMware) /* 317658fbc3c6SSteven Rostedt (VMware) * If interrupting an event time update, we may need an 317758fbc3c6SSteven Rostedt (VMware) * absolute timestamp. 3178a389d86fSSteven Rostedt (VMware) * Don't bother if this is the start of a new page (w == 0). 3179a389d86fSSteven Rostedt (VMware) */ 318058fbc3c6SSteven Rostedt (VMware) if (unlikely(!a_ok || !b_ok || (info->before != info->after && w))) { 31817c4b4a51SSteven Rostedt (VMware) info->add_timestamp |= RB_ADD_STAMP_FORCE | RB_ADD_STAMP_EXTEND; 3182fcc742eaSSteven Rostedt (Red Hat) info->length += RB_LEN_TIME_EXTEND; 318358fbc3c6SSteven Rostedt (VMware) } else { 318458fbc3c6SSteven Rostedt (VMware) info->delta = info->ts - info->after; 318558fbc3c6SSteven Rostedt (VMware) if (unlikely(test_time_stamp(info->delta))) { 318658fbc3c6SSteven Rostedt (VMware) info->add_timestamp |= RB_ADD_STAMP_EXTEND; 318758fbc3c6SSteven Rostedt (VMware) info->length += RB_LEN_TIME_EXTEND; 318858fbc3c6SSteven Rostedt (VMware) } 318958fbc3c6SSteven Rostedt (VMware) } 319058fbc3c6SSteven Rostedt (VMware) } 319177ae365eSSteven Rostedt 319210464b4aSSteven Rostedt (VMware) /*B*/ rb_time_set(&cpu_buffer->before_stamp, info->ts); 3193a389d86fSSteven Rostedt (VMware) 3194a389d86fSSteven Rostedt (VMware) /*C*/ write = local_add_return(info->length, &tail_page->write); 319577ae365eSSteven Rostedt 319677ae365eSSteven Rostedt /* set write to only the index of the write */ 319777ae365eSSteven Rostedt write &= RB_WRITE_MASK; 3198a389d86fSSteven Rostedt (VMware) 3199fcc742eaSSteven Rostedt (Red Hat) tail = write - info->length; 32006634ff26SSteven Rostedt 3201a389d86fSSteven Rostedt (VMware) /* See if we shot pass the end of this buffer page */ 3202a389d86fSSteven Rostedt (VMware) if (unlikely(write > BUF_PAGE_SIZE)) { 3203a389d86fSSteven Rostedt (VMware) if (tail != w) { 3204a389d86fSSteven Rostedt (VMware) /* before and after may now different, fix it up*/ 320558fbc3c6SSteven Rostedt (VMware) b_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before); 320658fbc3c6SSteven Rostedt (VMware) a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); 320758fbc3c6SSteven Rostedt (VMware) if (a_ok && b_ok && info->before != info->after) 320858fbc3c6SSteven Rostedt (VMware) (void)rb_time_cmpxchg(&cpu_buffer->before_stamp, 320958fbc3c6SSteven Rostedt (VMware) info->before, info->after); 3210a389d86fSSteven Rostedt (VMware) } 3211a389d86fSSteven Rostedt (VMware) return rb_move_tail(cpu_buffer, tail, info); 3212a389d86fSSteven Rostedt (VMware) } 3213a389d86fSSteven Rostedt (VMware) 3214a389d86fSSteven Rostedt (VMware) if (likely(tail == w)) { 3215a389d86fSSteven Rostedt (VMware) u64 save_before; 321610464b4aSSteven Rostedt (VMware) bool s_ok; 3217a389d86fSSteven Rostedt (VMware) 3218a389d86fSSteven Rostedt (VMware) /* Nothing interrupted us between A and C */ 321910464b4aSSteven Rostedt (VMware) /*D*/ rb_time_set(&cpu_buffer->write_stamp, info->ts); 3220a389d86fSSteven Rostedt (VMware) barrier(); 322110464b4aSSteven Rostedt (VMware) /*E*/ s_ok = rb_time_read(&cpu_buffer->before_stamp, &save_before); 322210464b4aSSteven Rostedt (VMware) RB_WARN_ON(cpu_buffer, !s_ok); 32237c4b4a51SSteven Rostedt (VMware) if (likely(!(info->add_timestamp & 32247c4b4a51SSteven Rostedt (VMware) (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE)))) 3225a389d86fSSteven Rostedt (VMware) /* This did not interrupt any time update */ 322658fbc3c6SSteven Rostedt (VMware) info->delta = info->ts - info->after; 3227a389d86fSSteven Rostedt (VMware) else 3228a389d86fSSteven Rostedt (VMware) /* Just use full timestamp for inerrupting event */ 3229a389d86fSSteven Rostedt (VMware) info->delta = info->ts; 3230a389d86fSSteven Rostedt (VMware) barrier(); 3231a389d86fSSteven Rostedt (VMware) if (unlikely(info->ts != save_before)) { 3232a389d86fSSteven Rostedt (VMware) /* SLOW PATH - Interrupted between C and E */ 3233a389d86fSSteven Rostedt (VMware) 323458fbc3c6SSteven Rostedt (VMware) a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); 323510464b4aSSteven Rostedt (VMware) RB_WARN_ON(cpu_buffer, !a_ok); 323610464b4aSSteven Rostedt (VMware) 3237a389d86fSSteven Rostedt (VMware) /* Write stamp must only go forward */ 323858fbc3c6SSteven Rostedt (VMware) if (save_before > info->after) { 3239a389d86fSSteven Rostedt (VMware) /* 3240a389d86fSSteven Rostedt (VMware) * We do not care about the result, only that 3241a389d86fSSteven Rostedt (VMware) * it gets updated atomically. 3242a389d86fSSteven Rostedt (VMware) */ 324358fbc3c6SSteven Rostedt (VMware) (void)rb_time_cmpxchg(&cpu_buffer->write_stamp, 324458fbc3c6SSteven Rostedt (VMware) info->after, save_before); 3245a389d86fSSteven Rostedt (VMware) } 3246a389d86fSSteven Rostedt (VMware) } 3247a389d86fSSteven Rostedt (VMware) } else { 3248a389d86fSSteven Rostedt (VMware) u64 ts; 3249a389d86fSSteven Rostedt (VMware) /* SLOW PATH - Interrupted between A and C */ 325058fbc3c6SSteven Rostedt (VMware) a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); 325110464b4aSSteven Rostedt (VMware) /* Was interrupted before here, write_stamp must be valid */ 325210464b4aSSteven Rostedt (VMware) RB_WARN_ON(cpu_buffer, !a_ok); 3253a389d86fSSteven Rostedt (VMware) ts = rb_time_stamp(cpu_buffer->buffer); 3254a389d86fSSteven Rostedt (VMware) barrier(); 3255a389d86fSSteven Rostedt (VMware) /*E*/ if (write == (local_read(&tail_page->write) & RB_WRITE_MASK) && 325658fbc3c6SSteven Rostedt (VMware) info->after < ts) { 3257a389d86fSSteven Rostedt (VMware) /* Nothing came after this event between C and E */ 325858fbc3c6SSteven Rostedt (VMware) info->delta = ts - info->after; 325958fbc3c6SSteven Rostedt (VMware) (void)rb_time_cmpxchg(&cpu_buffer->write_stamp, 326058fbc3c6SSteven Rostedt (VMware) info->after, info->ts); 3261a389d86fSSteven Rostedt (VMware) info->ts = ts; 3262a389d86fSSteven Rostedt (VMware) } else { 3263a389d86fSSteven Rostedt (VMware) /* 3264a389d86fSSteven Rostedt (VMware) * Interrupted beween C and E: 3265a389d86fSSteven Rostedt (VMware) * Lost the previous events time stamp. Just set the 3266a389d86fSSteven Rostedt (VMware) * delta to zero, and this will be the same time as 3267a389d86fSSteven Rostedt (VMware) * the event this event interrupted. And the events that 3268a389d86fSSteven Rostedt (VMware) * came after this will still be correct (as they would 3269a389d86fSSteven Rostedt (VMware) * have built their delta on the previous event. 3270a389d86fSSteven Rostedt (VMware) */ 3271a389d86fSSteven Rostedt (VMware) info->delta = 0; 3272a389d86fSSteven Rostedt (VMware) } 32737c4b4a51SSteven Rostedt (VMware) info->add_timestamp &= ~RB_ADD_STAMP_FORCE; 3274a389d86fSSteven Rostedt (VMware) } 3275a389d86fSSteven Rostedt (VMware) 3276b7dc42fdSSteven Rostedt (Red Hat) /* 3277b7dc42fdSSteven Rostedt (Red Hat) * If this is the first commit on the page, then it has the same 3278b7dc42fdSSteven Rostedt (Red Hat) * timestamp as the page itself. 3279b7dc42fdSSteven Rostedt (Red Hat) */ 32807c4b4a51SSteven Rostedt (VMware) if (unlikely(!tail && !(info->add_timestamp & 32817c4b4a51SSteven Rostedt (VMware) (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE)))) 3282b7dc42fdSSteven Rostedt (Red Hat) info->delta = 0; 3283b7dc42fdSSteven Rostedt (Red Hat) 32846634ff26SSteven Rostedt /* We reserved something on the buffer */ 3285b7dc42fdSSteven Rostedt (Red Hat) 32866634ff26SSteven Rostedt event = __rb_page_index(tail_page, tail); 3287fcc742eaSSteven Rostedt (Red Hat) rb_update_event(cpu_buffer, event, info); 32886634ff26SSteven Rostedt 32896634ff26SSteven Rostedt local_inc(&tail_page->entries); 32906634ff26SSteven Rostedt 3291b7dc42fdSSteven Rostedt (Red Hat) /* 3292b7dc42fdSSteven Rostedt (Red Hat) * If this is the first commit on the page, then update 3293b7dc42fdSSteven Rostedt (Red Hat) * its timestamp. 3294b7dc42fdSSteven Rostedt (Red Hat) */ 329575b21c6dSSteven Rostedt (VMware) if (unlikely(!tail)) 3296b7dc42fdSSteven Rostedt (Red Hat) tail_page->page->time_stamp = info->ts; 3297b7dc42fdSSteven Rostedt (Red Hat) 3298c64e148aSVaibhav Nagarnaik /* account for these added bytes */ 3299fcc742eaSSteven Rostedt (Red Hat) local_add(info->length, &cpu_buffer->entries_bytes); 3300c64e148aSVaibhav Nagarnaik 33016634ff26SSteven Rostedt return event; 33026634ff26SSteven Rostedt } 33036634ff26SSteven Rostedt 3304fa7ffb39SSteven Rostedt (Red Hat) static __always_inline struct ring_buffer_event * 330513292494SSteven Rostedt (VMware) rb_reserve_next_event(struct trace_buffer *buffer, 330662f0b3ebSSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer, 33071cd8d735SSteven Rostedt unsigned long length) 33087a8e76a3SSteven Rostedt { 33097a8e76a3SSteven Rostedt struct ring_buffer_event *event; 3310fcc742eaSSteven Rostedt (Red Hat) struct rb_event_info info; 3311818e3dd3SSteven Rostedt int nr_loops = 0; 331258fbc3c6SSteven Rostedt (VMware) int add_ts_default; 33137a8e76a3SSteven Rostedt 3314fa743953SSteven Rostedt rb_start_commit(cpu_buffer); 3315a389d86fSSteven Rostedt (VMware) /* The commit page can not change after this */ 3316fa743953SSteven Rostedt 331785bac32cSSteven Rostedt #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP 331862f0b3ebSSteven Rostedt /* 331962f0b3ebSSteven Rostedt * Due to the ability to swap a cpu buffer from a buffer 332062f0b3ebSSteven Rostedt * it is possible it was swapped before we committed. 332162f0b3ebSSteven Rostedt * (committing stops a swap). We check for it here and 332262f0b3ebSSteven Rostedt * if it happened, we have to fail the write. 332362f0b3ebSSteven Rostedt */ 332462f0b3ebSSteven Rostedt barrier(); 33256aa7de05SMark Rutland if (unlikely(READ_ONCE(cpu_buffer->buffer) != buffer)) { 332662f0b3ebSSteven Rostedt local_dec(&cpu_buffer->committing); 332762f0b3ebSSteven Rostedt local_dec(&cpu_buffer->commits); 332862f0b3ebSSteven Rostedt return NULL; 332962f0b3ebSSteven Rostedt } 333085bac32cSSteven Rostedt #endif 3331b7dc42fdSSteven Rostedt (Red Hat) 3332fcc742eaSSteven Rostedt (Red Hat) info.length = rb_calculate_event_length(length); 333358fbc3c6SSteven Rostedt (VMware) 333458fbc3c6SSteven Rostedt (VMware) if (ring_buffer_time_stamp_abs(cpu_buffer->buffer)) { 333558fbc3c6SSteven Rostedt (VMware) add_ts_default = RB_ADD_STAMP_ABSOLUTE; 333658fbc3c6SSteven Rostedt (VMware) info.length += RB_LEN_TIME_EXTEND; 333758fbc3c6SSteven Rostedt (VMware) } else { 333858fbc3c6SSteven Rostedt (VMware) add_ts_default = RB_ADD_STAMP_NONE; 333958fbc3c6SSteven Rostedt (VMware) } 334058fbc3c6SSteven Rostedt (VMware) 3341a4543a2fSSteven Rostedt (Red Hat) again: 334258fbc3c6SSteven Rostedt (VMware) info.add_timestamp = add_ts_default; 3343b7dc42fdSSteven Rostedt (Red Hat) info.delta = 0; 3344b7dc42fdSSteven Rostedt (Red Hat) 3345818e3dd3SSteven Rostedt /* 3346818e3dd3SSteven Rostedt * We allow for interrupts to reenter here and do a trace. 3347818e3dd3SSteven Rostedt * If one does, it will cause this original code to loop 3348818e3dd3SSteven Rostedt * back here. Even with heavy interrupts happening, this 3349818e3dd3SSteven Rostedt * should only happen a few times in a row. If this happens 3350818e3dd3SSteven Rostedt * 1000 times in a row, there must be either an interrupt 3351818e3dd3SSteven Rostedt * storm or we have something buggy. 3352818e3dd3SSteven Rostedt * Bail! 3353818e3dd3SSteven Rostedt */ 33543e89c7bbSSteven Rostedt if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) 3355fa743953SSteven Rostedt goto out_fail; 3356818e3dd3SSteven Rostedt 3357fcc742eaSSteven Rostedt (Red Hat) event = __rb_reserve_next(cpu_buffer, &info); 3358fcc742eaSSteven Rostedt (Red Hat) 3359bd1b7cd3SSteven Rostedt (Red Hat) if (unlikely(PTR_ERR(event) == -EAGAIN)) { 336058fbc3c6SSteven Rostedt (VMware) if (info.add_timestamp & (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_EXTEND)) 3361bd1b7cd3SSteven Rostedt (Red Hat) info.length -= RB_LEN_TIME_EXTEND; 3362bf41a158SSteven Rostedt goto again; 3363bd1b7cd3SSteven Rostedt (Red Hat) } 33647a8e76a3SSteven Rostedt 3365a389d86fSSteven Rostedt (VMware) if (likely(event)) 33667a8e76a3SSteven Rostedt return event; 3367fa743953SSteven Rostedt out_fail: 3368fa743953SSteven Rostedt rb_end_commit(cpu_buffer); 3369fa743953SSteven Rostedt return NULL; 33707a8e76a3SSteven Rostedt } 33717a8e76a3SSteven Rostedt 33727a8e76a3SSteven Rostedt /** 33737a8e76a3SSteven Rostedt * ring_buffer_lock_reserve - reserve a part of the buffer 33747a8e76a3SSteven Rostedt * @buffer: the ring buffer to reserve from 33757a8e76a3SSteven Rostedt * @length: the length of the data to reserve (excluding event header) 33767a8e76a3SSteven Rostedt * 33776167c205SSteven Rostedt (VMware) * Returns a reserved event on the ring buffer to copy directly to. 33787a8e76a3SSteven Rostedt * The user of this interface will need to get the body to write into 33797a8e76a3SSteven Rostedt * and can use the ring_buffer_event_data() interface. 33807a8e76a3SSteven Rostedt * 33817a8e76a3SSteven Rostedt * The length is the length of the data needed, not the event length 33827a8e76a3SSteven Rostedt * which also includes the event header. 33837a8e76a3SSteven Rostedt * 33847a8e76a3SSteven Rostedt * Must be paired with ring_buffer_unlock_commit, unless NULL is returned. 33857a8e76a3SSteven Rostedt * If NULL is returned, then nothing has been allocated or locked. 33867a8e76a3SSteven Rostedt */ 33877a8e76a3SSteven Rostedt struct ring_buffer_event * 338813292494SSteven Rostedt (VMware) ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length) 33897a8e76a3SSteven Rostedt { 33907a8e76a3SSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer; 33917a8e76a3SSteven Rostedt struct ring_buffer_event *event; 33925168ae50SSteven Rostedt int cpu; 33937a8e76a3SSteven Rostedt 3394bf41a158SSteven Rostedt /* If we are tracing schedule, we don't want to recurse */ 33955168ae50SSteven Rostedt preempt_disable_notrace(); 3396bf41a158SSteven Rostedt 33973205f806SSteven Rostedt (Red Hat) if (unlikely(atomic_read(&buffer->record_disabled))) 339858a09ec6SSteven Rostedt (Red Hat) goto out; 3399261842b7SSteven Rostedt 34007a8e76a3SSteven Rostedt cpu = raw_smp_processor_id(); 34017a8e76a3SSteven Rostedt 34023205f806SSteven Rostedt (Red Hat) if (unlikely(!cpumask_test_cpu(cpu, buffer->cpumask))) 3403d769041fSSteven Rostedt goto out; 34047a8e76a3SSteven Rostedt 34057a8e76a3SSteven Rostedt cpu_buffer = buffer->buffers[cpu]; 34067a8e76a3SSteven Rostedt 34073205f806SSteven Rostedt (Red Hat) if (unlikely(atomic_read(&cpu_buffer->record_disabled))) 3408d769041fSSteven Rostedt goto out; 34097a8e76a3SSteven Rostedt 34103205f806SSteven Rostedt (Red Hat) if (unlikely(length > BUF_MAX_DATA_SIZE)) 3411bf41a158SSteven Rostedt goto out; 34127a8e76a3SSteven Rostedt 341358a09ec6SSteven Rostedt (Red Hat) if (unlikely(trace_recursive_lock(cpu_buffer))) 341458a09ec6SSteven Rostedt (Red Hat) goto out; 341558a09ec6SSteven Rostedt (Red Hat) 341662f0b3ebSSteven Rostedt event = rb_reserve_next_event(buffer, cpu_buffer, length); 34177a8e76a3SSteven Rostedt if (!event) 341858a09ec6SSteven Rostedt (Red Hat) goto out_unlock; 34197a8e76a3SSteven Rostedt 34207a8e76a3SSteven Rostedt return event; 34217a8e76a3SSteven Rostedt 342258a09ec6SSteven Rostedt (Red Hat) out_unlock: 342358a09ec6SSteven Rostedt (Red Hat) trace_recursive_unlock(cpu_buffer); 3424d769041fSSteven Rostedt out: 34255168ae50SSteven Rostedt preempt_enable_notrace(); 34267a8e76a3SSteven Rostedt return NULL; 34277a8e76a3SSteven Rostedt } 3428c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); 34297a8e76a3SSteven Rostedt 3430a1863c21SSteven Rostedt /* 3431a1863c21SSteven Rostedt * Decrement the entries to the page that an event is on. 3432a1863c21SSteven Rostedt * The event does not even need to exist, only the pointer 3433a1863c21SSteven Rostedt * to the page it is on. This may only be called before the commit 3434a1863c21SSteven Rostedt * takes place. 3435a1863c21SSteven Rostedt */ 3436a1863c21SSteven Rostedt static inline void 3437a1863c21SSteven Rostedt rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer, 3438a1863c21SSteven Rostedt struct ring_buffer_event *event) 3439a1863c21SSteven Rostedt { 3440a1863c21SSteven Rostedt unsigned long addr = (unsigned long)event; 3441a1863c21SSteven Rostedt struct buffer_page *bpage = cpu_buffer->commit_page; 3442a1863c21SSteven Rostedt struct buffer_page *start; 3443a1863c21SSteven Rostedt 3444a1863c21SSteven Rostedt addr &= PAGE_MASK; 3445a1863c21SSteven Rostedt 3446a1863c21SSteven Rostedt /* Do the likely case first */ 3447a1863c21SSteven Rostedt if (likely(bpage->page == (void *)addr)) { 3448a1863c21SSteven Rostedt local_dec(&bpage->entries); 3449a1863c21SSteven Rostedt return; 3450a1863c21SSteven Rostedt } 3451a1863c21SSteven Rostedt 3452a1863c21SSteven Rostedt /* 3453a1863c21SSteven Rostedt * Because the commit page may be on the reader page we 3454a1863c21SSteven Rostedt * start with the next page and check the end loop there. 3455a1863c21SSteven Rostedt */ 3456a1863c21SSteven Rostedt rb_inc_page(cpu_buffer, &bpage); 3457a1863c21SSteven Rostedt start = bpage; 3458a1863c21SSteven Rostedt do { 3459a1863c21SSteven Rostedt if (bpage->page == (void *)addr) { 3460a1863c21SSteven Rostedt local_dec(&bpage->entries); 3461a1863c21SSteven Rostedt return; 3462a1863c21SSteven Rostedt } 3463a1863c21SSteven Rostedt rb_inc_page(cpu_buffer, &bpage); 3464a1863c21SSteven Rostedt } while (bpage != start); 3465a1863c21SSteven Rostedt 3466a1863c21SSteven Rostedt /* commit not part of this buffer?? */ 3467a1863c21SSteven Rostedt RB_WARN_ON(cpu_buffer, 1); 3468a1863c21SSteven Rostedt } 3469a1863c21SSteven Rostedt 34707a8e76a3SSteven Rostedt /** 3471fa1b47ddSSteven Rostedt * ring_buffer_commit_discard - discard an event that has not been committed 3472fa1b47ddSSteven Rostedt * @buffer: the ring buffer 3473fa1b47ddSSteven Rostedt * @event: non committed event to discard 3474fa1b47ddSSteven Rostedt * 3475dc892f73SSteven Rostedt * Sometimes an event that is in the ring buffer needs to be ignored. 3476dc892f73SSteven Rostedt * This function lets the user discard an event in the ring buffer 3477dc892f73SSteven Rostedt * and then that event will not be read later. 3478dc892f73SSteven Rostedt * 34796167c205SSteven Rostedt (VMware) * This function only works if it is called before the item has been 3480dc892f73SSteven Rostedt * committed. It will try to free the event from the ring buffer 3481fa1b47ddSSteven Rostedt * if another event has not been added behind it. 3482fa1b47ddSSteven Rostedt * 3483fa1b47ddSSteven Rostedt * If another event has been added behind it, it will set the event 3484fa1b47ddSSteven Rostedt * up as discarded, and perform the commit. 3485fa1b47ddSSteven Rostedt * 3486fa1b47ddSSteven Rostedt * If this function is called, do not call ring_buffer_unlock_commit on 3487fa1b47ddSSteven Rostedt * the event. 3488fa1b47ddSSteven Rostedt */ 348913292494SSteven Rostedt (VMware) void ring_buffer_discard_commit(struct trace_buffer *buffer, 3490fa1b47ddSSteven Rostedt struct ring_buffer_event *event) 3491fa1b47ddSSteven Rostedt { 3492fa1b47ddSSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer; 3493fa1b47ddSSteven Rostedt int cpu; 3494fa1b47ddSSteven Rostedt 3495fa1b47ddSSteven Rostedt /* The event is discarded regardless */ 3496f3b9aae1SFrederic Weisbecker rb_event_discard(event); 3497fa1b47ddSSteven Rostedt 3498fa743953SSteven Rostedt cpu = smp_processor_id(); 3499fa743953SSteven Rostedt cpu_buffer = buffer->buffers[cpu]; 3500fa743953SSteven Rostedt 3501fa1b47ddSSteven Rostedt /* 3502fa1b47ddSSteven Rostedt * This must only be called if the event has not been 3503fa1b47ddSSteven Rostedt * committed yet. Thus we can assume that preemption 3504fa1b47ddSSteven Rostedt * is still disabled. 3505fa1b47ddSSteven Rostedt */ 3506fa743953SSteven Rostedt RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing)); 3507fa1b47ddSSteven Rostedt 3508a1863c21SSteven Rostedt rb_decrement_entry(cpu_buffer, event); 35090f2541d2SSteven Rostedt if (rb_try_to_discard(cpu_buffer, event)) 3510fa1b47ddSSteven Rostedt goto out; 3511fa1b47ddSSteven Rostedt 3512fa1b47ddSSteven Rostedt out: 3513fa743953SSteven Rostedt rb_end_commit(cpu_buffer); 3514fa1b47ddSSteven Rostedt 351558a09ec6SSteven Rostedt (Red Hat) trace_recursive_unlock(cpu_buffer); 3516f3b9aae1SFrederic Weisbecker 35175168ae50SSteven Rostedt preempt_enable_notrace(); 3518fa1b47ddSSteven Rostedt 3519fa1b47ddSSteven Rostedt } 3520fa1b47ddSSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); 3521fa1b47ddSSteven Rostedt 3522fa1b47ddSSteven Rostedt /** 35237a8e76a3SSteven Rostedt * ring_buffer_write - write data to the buffer without reserving 35247a8e76a3SSteven Rostedt * @buffer: The ring buffer to write to. 35257a8e76a3SSteven Rostedt * @length: The length of the data being written (excluding the event header) 35267a8e76a3SSteven Rostedt * @data: The data to write to the buffer. 35277a8e76a3SSteven Rostedt * 35287a8e76a3SSteven Rostedt * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as 35297a8e76a3SSteven Rostedt * one function. If you already have the data to write to the buffer, it 35307a8e76a3SSteven Rostedt * may be easier to simply call this function. 35317a8e76a3SSteven Rostedt * 35327a8e76a3SSteven Rostedt * Note, like ring_buffer_lock_reserve, the length is the length of the data 35337a8e76a3SSteven Rostedt * and not the length of the event which would hold the header. 35347a8e76a3SSteven Rostedt */ 353513292494SSteven Rostedt (VMware) int ring_buffer_write(struct trace_buffer *buffer, 35367a8e76a3SSteven Rostedt unsigned long length, 35377a8e76a3SSteven Rostedt void *data) 35387a8e76a3SSteven Rostedt { 35397a8e76a3SSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer; 35407a8e76a3SSteven Rostedt struct ring_buffer_event *event; 35417a8e76a3SSteven Rostedt void *body; 35427a8e76a3SSteven Rostedt int ret = -EBUSY; 35435168ae50SSteven Rostedt int cpu; 35447a8e76a3SSteven Rostedt 35455168ae50SSteven Rostedt preempt_disable_notrace(); 3546bf41a158SSteven Rostedt 354752fbe9cdSLai Jiangshan if (atomic_read(&buffer->record_disabled)) 354852fbe9cdSLai Jiangshan goto out; 354952fbe9cdSLai Jiangshan 35507a8e76a3SSteven Rostedt cpu = raw_smp_processor_id(); 35517a8e76a3SSteven Rostedt 35529e01c1b7SRusty Russell if (!cpumask_test_cpu(cpu, buffer->cpumask)) 3553d769041fSSteven Rostedt goto out; 35547a8e76a3SSteven Rostedt 35557a8e76a3SSteven Rostedt cpu_buffer = buffer->buffers[cpu]; 35567a8e76a3SSteven Rostedt 35577a8e76a3SSteven Rostedt if (atomic_read(&cpu_buffer->record_disabled)) 35587a8e76a3SSteven Rostedt goto out; 35597a8e76a3SSteven Rostedt 3560be957c44SSteven Rostedt if (length > BUF_MAX_DATA_SIZE) 3561be957c44SSteven Rostedt goto out; 3562be957c44SSteven Rostedt 3563985e871bSSteven Rostedt (Red Hat) if (unlikely(trace_recursive_lock(cpu_buffer))) 3564985e871bSSteven Rostedt (Red Hat) goto out; 3565985e871bSSteven Rostedt (Red Hat) 356662f0b3ebSSteven Rostedt event = rb_reserve_next_event(buffer, cpu_buffer, length); 35677a8e76a3SSteven Rostedt if (!event) 3568985e871bSSteven Rostedt (Red Hat) goto out_unlock; 35697a8e76a3SSteven Rostedt 35707a8e76a3SSteven Rostedt body = rb_event_data(event); 35717a8e76a3SSteven Rostedt 35727a8e76a3SSteven Rostedt memcpy(body, data, length); 35737a8e76a3SSteven Rostedt 35747a8e76a3SSteven Rostedt rb_commit(cpu_buffer, event); 35757a8e76a3SSteven Rostedt 357615693458SSteven Rostedt (Red Hat) rb_wakeups(buffer, cpu_buffer); 357715693458SSteven Rostedt (Red Hat) 35787a8e76a3SSteven Rostedt ret = 0; 3579985e871bSSteven Rostedt (Red Hat) 3580985e871bSSteven Rostedt (Red Hat) out_unlock: 3581985e871bSSteven Rostedt (Red Hat) trace_recursive_unlock(cpu_buffer); 3582985e871bSSteven Rostedt (Red Hat) 35837a8e76a3SSteven Rostedt out: 35845168ae50SSteven Rostedt preempt_enable_notrace(); 35857a8e76a3SSteven Rostedt 35867a8e76a3SSteven Rostedt return ret; 35877a8e76a3SSteven Rostedt } 3588c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_write); 35897a8e76a3SSteven Rostedt 3590da58834cSYaowei Bai static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) 3591bf41a158SSteven Rostedt { 3592bf41a158SSteven Rostedt struct buffer_page *reader = cpu_buffer->reader_page; 359377ae365eSSteven Rostedt struct buffer_page *head = rb_set_head_page(cpu_buffer); 3594bf41a158SSteven Rostedt struct buffer_page *commit = cpu_buffer->commit_page; 3595bf41a158SSteven Rostedt 359677ae365eSSteven Rostedt /* In case of error, head will be NULL */ 359777ae365eSSteven Rostedt if (unlikely(!head)) 3598da58834cSYaowei Bai return true; 359977ae365eSSteven Rostedt 3600bf41a158SSteven Rostedt return reader->read == rb_page_commit(reader) && 3601bf41a158SSteven Rostedt (commit == reader || 3602bf41a158SSteven Rostedt (commit == head && 3603bf41a158SSteven Rostedt head->read == rb_page_commit(commit))); 3604bf41a158SSteven Rostedt } 3605bf41a158SSteven Rostedt 36067a8e76a3SSteven Rostedt /** 36077a8e76a3SSteven Rostedt * ring_buffer_record_disable - stop all writes into the buffer 36087a8e76a3SSteven Rostedt * @buffer: The ring buffer to stop writes to. 36097a8e76a3SSteven Rostedt * 36107a8e76a3SSteven Rostedt * This prevents all writes to the buffer. Any attempt to write 36117a8e76a3SSteven Rostedt * to the buffer after this will fail and return NULL. 36127a8e76a3SSteven Rostedt * 361374401729SPaul E. McKenney * The caller should call synchronize_rcu() after this. 36147a8e76a3SSteven Rostedt */ 361513292494SSteven Rostedt (VMware) void ring_buffer_record_disable(struct trace_buffer *buffer) 36167a8e76a3SSteven Rostedt { 36177a8e76a3SSteven Rostedt atomic_inc(&buffer->record_disabled); 36187a8e76a3SSteven Rostedt } 3619c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_record_disable); 36207a8e76a3SSteven Rostedt 36217a8e76a3SSteven Rostedt /** 36227a8e76a3SSteven Rostedt * ring_buffer_record_enable - enable writes to the buffer 36237a8e76a3SSteven Rostedt * @buffer: The ring buffer to enable writes 36247a8e76a3SSteven Rostedt * 36257a8e76a3SSteven Rostedt * Note, multiple disables will need the same number of enables 3626c41b20e7SAdam Buchbinder * to truly enable the writing (much like preempt_disable). 36277a8e76a3SSteven Rostedt */ 362813292494SSteven Rostedt (VMware) void ring_buffer_record_enable(struct trace_buffer *buffer) 36297a8e76a3SSteven Rostedt { 36307a8e76a3SSteven Rostedt atomic_dec(&buffer->record_disabled); 36317a8e76a3SSteven Rostedt } 3632c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_record_enable); 36337a8e76a3SSteven Rostedt 36347a8e76a3SSteven Rostedt /** 3635499e5470SSteven Rostedt * ring_buffer_record_off - stop all writes into the buffer 3636499e5470SSteven Rostedt * @buffer: The ring buffer to stop writes to. 3637499e5470SSteven Rostedt * 3638499e5470SSteven Rostedt * This prevents all writes to the buffer. Any attempt to write 3639499e5470SSteven Rostedt * to the buffer after this will fail and return NULL. 3640499e5470SSteven Rostedt * 3641499e5470SSteven Rostedt * This is different than ring_buffer_record_disable() as 364287abb3b1SWang Tianhong * it works like an on/off switch, where as the disable() version 3643499e5470SSteven Rostedt * must be paired with a enable(). 3644499e5470SSteven Rostedt */ 364513292494SSteven Rostedt (VMware) void ring_buffer_record_off(struct trace_buffer *buffer) 3646499e5470SSteven Rostedt { 3647499e5470SSteven Rostedt unsigned int rd; 3648499e5470SSteven Rostedt unsigned int new_rd; 3649499e5470SSteven Rostedt 3650499e5470SSteven Rostedt do { 3651499e5470SSteven Rostedt rd = atomic_read(&buffer->record_disabled); 3652499e5470SSteven Rostedt new_rd = rd | RB_BUFFER_OFF; 3653499e5470SSteven Rostedt } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd); 3654499e5470SSteven Rostedt } 3655499e5470SSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_record_off); 3656499e5470SSteven Rostedt 3657499e5470SSteven Rostedt /** 3658499e5470SSteven Rostedt * ring_buffer_record_on - restart writes into the buffer 3659499e5470SSteven Rostedt * @buffer: The ring buffer to start writes to. 3660499e5470SSteven Rostedt * 3661499e5470SSteven Rostedt * This enables all writes to the buffer that was disabled by 3662499e5470SSteven Rostedt * ring_buffer_record_off(). 3663499e5470SSteven Rostedt * 3664499e5470SSteven Rostedt * This is different than ring_buffer_record_enable() as 366587abb3b1SWang Tianhong * it works like an on/off switch, where as the enable() version 3666499e5470SSteven Rostedt * must be paired with a disable(). 3667499e5470SSteven Rostedt */ 366813292494SSteven Rostedt (VMware) void ring_buffer_record_on(struct trace_buffer *buffer) 3669499e5470SSteven Rostedt { 3670499e5470SSteven Rostedt unsigned int rd; 3671499e5470SSteven Rostedt unsigned int new_rd; 3672499e5470SSteven Rostedt 3673499e5470SSteven Rostedt do { 3674499e5470SSteven Rostedt rd = atomic_read(&buffer->record_disabled); 3675499e5470SSteven Rostedt new_rd = rd & ~RB_BUFFER_OFF; 3676499e5470SSteven Rostedt } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd); 3677499e5470SSteven Rostedt } 3678499e5470SSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_record_on); 3679499e5470SSteven Rostedt 3680499e5470SSteven Rostedt /** 3681499e5470SSteven Rostedt * ring_buffer_record_is_on - return true if the ring buffer can write 3682499e5470SSteven Rostedt * @buffer: The ring buffer to see if write is enabled 3683499e5470SSteven Rostedt * 3684499e5470SSteven Rostedt * Returns true if the ring buffer is in a state that it accepts writes. 3685499e5470SSteven Rostedt */ 368613292494SSteven Rostedt (VMware) bool ring_buffer_record_is_on(struct trace_buffer *buffer) 3687499e5470SSteven Rostedt { 3688499e5470SSteven Rostedt return !atomic_read(&buffer->record_disabled); 3689499e5470SSteven Rostedt } 3690499e5470SSteven Rostedt 3691499e5470SSteven Rostedt /** 369273c8d894SMasami Hiramatsu * ring_buffer_record_is_set_on - return true if the ring buffer is set writable 369373c8d894SMasami Hiramatsu * @buffer: The ring buffer to see if write is set enabled 369473c8d894SMasami Hiramatsu * 369573c8d894SMasami Hiramatsu * Returns true if the ring buffer is set writable by ring_buffer_record_on(). 369673c8d894SMasami Hiramatsu * Note that this does NOT mean it is in a writable state. 369773c8d894SMasami Hiramatsu * 369873c8d894SMasami Hiramatsu * It may return true when the ring buffer has been disabled by 369973c8d894SMasami Hiramatsu * ring_buffer_record_disable(), as that is a temporary disabling of 370073c8d894SMasami Hiramatsu * the ring buffer. 370173c8d894SMasami Hiramatsu */ 370213292494SSteven Rostedt (VMware) bool ring_buffer_record_is_set_on(struct trace_buffer *buffer) 370373c8d894SMasami Hiramatsu { 370473c8d894SMasami Hiramatsu return !(atomic_read(&buffer->record_disabled) & RB_BUFFER_OFF); 370573c8d894SMasami Hiramatsu } 370673c8d894SMasami Hiramatsu 370773c8d894SMasami Hiramatsu /** 37087a8e76a3SSteven Rostedt * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer 37097a8e76a3SSteven Rostedt * @buffer: The ring buffer to stop writes to. 37107a8e76a3SSteven Rostedt * @cpu: The CPU buffer to stop 37117a8e76a3SSteven Rostedt * 37127a8e76a3SSteven Rostedt * This prevents all writes to the buffer. Any attempt to write 37137a8e76a3SSteven Rostedt * to the buffer after this will fail and return NULL. 37147a8e76a3SSteven Rostedt * 371574401729SPaul E. McKenney * The caller should call synchronize_rcu() after this. 37167a8e76a3SSteven Rostedt */ 371713292494SSteven Rostedt (VMware) void ring_buffer_record_disable_cpu(struct trace_buffer *buffer, int cpu) 37187a8e76a3SSteven Rostedt { 37197a8e76a3SSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer; 37207a8e76a3SSteven Rostedt 37219e01c1b7SRusty Russell if (!cpumask_test_cpu(cpu, buffer->cpumask)) 37228aabee57SSteven Rostedt return; 37237a8e76a3SSteven Rostedt 37247a8e76a3SSteven Rostedt cpu_buffer = buffer->buffers[cpu]; 37257a8e76a3SSteven Rostedt atomic_inc(&cpu_buffer->record_disabled); 37267a8e76a3SSteven Rostedt } 3727c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu); 37287a8e76a3SSteven Rostedt 37297a8e76a3SSteven Rostedt /** 37307a8e76a3SSteven Rostedt * ring_buffer_record_enable_cpu - enable writes to the buffer 37317a8e76a3SSteven Rostedt * @buffer: The ring buffer to enable writes 37327a8e76a3SSteven Rostedt * @cpu: The CPU to enable. 37337a8e76a3SSteven Rostedt * 37347a8e76a3SSteven Rostedt * Note, multiple disables will need the same number of enables 3735c41b20e7SAdam Buchbinder * to truly enable the writing (much like preempt_disable). 37367a8e76a3SSteven Rostedt */ 373713292494SSteven Rostedt (VMware) void ring_buffer_record_enable_cpu(struct trace_buffer *buffer, int cpu) 37387a8e76a3SSteven Rostedt { 37397a8e76a3SSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer; 37407a8e76a3SSteven Rostedt 37419e01c1b7SRusty Russell if (!cpumask_test_cpu(cpu, buffer->cpumask)) 37428aabee57SSteven Rostedt return; 37437a8e76a3SSteven Rostedt 37447a8e76a3SSteven Rostedt cpu_buffer = buffer->buffers[cpu]; 37457a8e76a3SSteven Rostedt atomic_dec(&cpu_buffer->record_disabled); 37467a8e76a3SSteven Rostedt } 3747c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); 37487a8e76a3SSteven Rostedt 3749f6195aa0SSteven Rostedt /* 3750f6195aa0SSteven Rostedt * The total entries in the ring buffer is the running counter 3751f6195aa0SSteven Rostedt * of entries entered into the ring buffer, minus the sum of 3752f6195aa0SSteven Rostedt * the entries read from the ring buffer and the number of 3753f6195aa0SSteven Rostedt * entries that were overwritten. 3754f6195aa0SSteven Rostedt */ 3755f6195aa0SSteven Rostedt static inline unsigned long 3756f6195aa0SSteven Rostedt rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer) 3757f6195aa0SSteven Rostedt { 3758f6195aa0SSteven Rostedt return local_read(&cpu_buffer->entries) - 3759f6195aa0SSteven Rostedt (local_read(&cpu_buffer->overrun) + cpu_buffer->read); 3760f6195aa0SSteven Rostedt } 3761f6195aa0SSteven Rostedt 37627a8e76a3SSteven Rostedt /** 3763c64e148aSVaibhav Nagarnaik * ring_buffer_oldest_event_ts - get the oldest event timestamp from the buffer 3764c64e148aSVaibhav Nagarnaik * @buffer: The ring buffer 3765c64e148aSVaibhav Nagarnaik * @cpu: The per CPU buffer to read from. 3766c64e148aSVaibhav Nagarnaik */ 376713292494SSteven Rostedt (VMware) u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu) 3768c64e148aSVaibhav Nagarnaik { 3769c64e148aSVaibhav Nagarnaik unsigned long flags; 3770c64e148aSVaibhav Nagarnaik struct ring_buffer_per_cpu *cpu_buffer; 3771c64e148aSVaibhav Nagarnaik struct buffer_page *bpage; 3772da830e58SLinus Torvalds u64 ret = 0; 3773c64e148aSVaibhav Nagarnaik 3774c64e148aSVaibhav Nagarnaik if (!cpumask_test_cpu(cpu, buffer->cpumask)) 3775c64e148aSVaibhav Nagarnaik return 0; 3776c64e148aSVaibhav Nagarnaik 3777c64e148aSVaibhav Nagarnaik cpu_buffer = buffer->buffers[cpu]; 37787115e3fcSLinus Torvalds raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3779c64e148aSVaibhav Nagarnaik /* 3780c64e148aSVaibhav Nagarnaik * if the tail is on reader_page, oldest time stamp is on the reader 3781c64e148aSVaibhav Nagarnaik * page 3782c64e148aSVaibhav Nagarnaik */ 3783c64e148aSVaibhav Nagarnaik if (cpu_buffer->tail_page == cpu_buffer->reader_page) 3784c64e148aSVaibhav Nagarnaik bpage = cpu_buffer->reader_page; 3785c64e148aSVaibhav Nagarnaik else 3786c64e148aSVaibhav Nagarnaik bpage = rb_set_head_page(cpu_buffer); 378754f7be5bSSteven Rostedt if (bpage) 3788c64e148aSVaibhav Nagarnaik ret = bpage->page->time_stamp; 37897115e3fcSLinus Torvalds raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3790c64e148aSVaibhav Nagarnaik 3791c64e148aSVaibhav Nagarnaik return ret; 3792c64e148aSVaibhav Nagarnaik } 3793c64e148aSVaibhav Nagarnaik EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts); 3794c64e148aSVaibhav Nagarnaik 3795c64e148aSVaibhav Nagarnaik /** 3796c64e148aSVaibhav Nagarnaik * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer 3797c64e148aSVaibhav Nagarnaik * @buffer: The ring buffer 3798c64e148aSVaibhav Nagarnaik * @cpu: The per CPU buffer to read from. 3799c64e148aSVaibhav Nagarnaik */ 380013292494SSteven Rostedt (VMware) unsigned long ring_buffer_bytes_cpu(struct trace_buffer *buffer, int cpu) 3801c64e148aSVaibhav Nagarnaik { 3802c64e148aSVaibhav Nagarnaik struct ring_buffer_per_cpu *cpu_buffer; 3803c64e148aSVaibhav Nagarnaik unsigned long ret; 3804c64e148aSVaibhav Nagarnaik 3805c64e148aSVaibhav Nagarnaik if (!cpumask_test_cpu(cpu, buffer->cpumask)) 3806c64e148aSVaibhav Nagarnaik return 0; 3807c64e148aSVaibhav Nagarnaik 3808c64e148aSVaibhav Nagarnaik cpu_buffer = buffer->buffers[cpu]; 3809c64e148aSVaibhav Nagarnaik ret = local_read(&cpu_buffer->entries_bytes) - cpu_buffer->read_bytes; 3810c64e148aSVaibhav Nagarnaik 3811c64e148aSVaibhav Nagarnaik return ret; 3812c64e148aSVaibhav Nagarnaik } 3813c64e148aSVaibhav Nagarnaik EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu); 3814c64e148aSVaibhav Nagarnaik 3815c64e148aSVaibhav Nagarnaik /** 38167a8e76a3SSteven Rostedt * ring_buffer_entries_cpu - get the number of entries in a cpu buffer 38177a8e76a3SSteven Rostedt * @buffer: The ring buffer 38187a8e76a3SSteven Rostedt * @cpu: The per CPU buffer to get the entries from. 38197a8e76a3SSteven Rostedt */ 382013292494SSteven Rostedt (VMware) unsigned long ring_buffer_entries_cpu(struct trace_buffer *buffer, int cpu) 38217a8e76a3SSteven Rostedt { 38227a8e76a3SSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer; 38237a8e76a3SSteven Rostedt 38249e01c1b7SRusty Russell if (!cpumask_test_cpu(cpu, buffer->cpumask)) 38258aabee57SSteven Rostedt return 0; 38267a8e76a3SSteven Rostedt 38277a8e76a3SSteven Rostedt cpu_buffer = buffer->buffers[cpu]; 3828554f786eSSteven Rostedt 3829f6195aa0SSteven Rostedt return rb_num_of_entries(cpu_buffer); 38307a8e76a3SSteven Rostedt } 3831c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); 38327a8e76a3SSteven Rostedt 38337a8e76a3SSteven Rostedt /** 3834884bfe89SSlava Pestov * ring_buffer_overrun_cpu - get the number of overruns caused by the ring 3835884bfe89SSlava Pestov * buffer wrapping around (only if RB_FL_OVERWRITE is on). 38367a8e76a3SSteven Rostedt * @buffer: The ring buffer 38377a8e76a3SSteven Rostedt * @cpu: The per CPU buffer to get the number of overruns from 38387a8e76a3SSteven Rostedt */ 383913292494SSteven Rostedt (VMware) unsigned long ring_buffer_overrun_cpu(struct trace_buffer *buffer, int cpu) 38407a8e76a3SSteven Rostedt { 38417a8e76a3SSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer; 38428aabee57SSteven Rostedt unsigned long ret; 38437a8e76a3SSteven Rostedt 38449e01c1b7SRusty Russell if (!cpumask_test_cpu(cpu, buffer->cpumask)) 38458aabee57SSteven Rostedt return 0; 38467a8e76a3SSteven Rostedt 38477a8e76a3SSteven Rostedt cpu_buffer = buffer->buffers[cpu]; 384877ae365eSSteven Rostedt ret = local_read(&cpu_buffer->overrun); 3849554f786eSSteven Rostedt 3850554f786eSSteven Rostedt return ret; 38517a8e76a3SSteven Rostedt } 3852c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); 38537a8e76a3SSteven Rostedt 38547a8e76a3SSteven Rostedt /** 3855884bfe89SSlava Pestov * ring_buffer_commit_overrun_cpu - get the number of overruns caused by 3856884bfe89SSlava Pestov * commits failing due to the buffer wrapping around while there are uncommitted 3857884bfe89SSlava Pestov * events, such as during an interrupt storm. 3858f0d2c681SSteven Rostedt * @buffer: The ring buffer 3859f0d2c681SSteven Rostedt * @cpu: The per CPU buffer to get the number of overruns from 3860f0d2c681SSteven Rostedt */ 3861f0d2c681SSteven Rostedt unsigned long 386213292494SSteven Rostedt (VMware) ring_buffer_commit_overrun_cpu(struct trace_buffer *buffer, int cpu) 3863f0d2c681SSteven Rostedt { 3864f0d2c681SSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer; 3865f0d2c681SSteven Rostedt unsigned long ret; 3866f0d2c681SSteven Rostedt 3867f0d2c681SSteven Rostedt if (!cpumask_test_cpu(cpu, buffer->cpumask)) 3868f0d2c681SSteven Rostedt return 0; 3869f0d2c681SSteven Rostedt 3870f0d2c681SSteven Rostedt cpu_buffer = buffer->buffers[cpu]; 387177ae365eSSteven Rostedt ret = local_read(&cpu_buffer->commit_overrun); 3872f0d2c681SSteven Rostedt 3873f0d2c681SSteven Rostedt return ret; 3874f0d2c681SSteven Rostedt } 3875f0d2c681SSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu); 3876f0d2c681SSteven Rostedt 3877f0d2c681SSteven Rostedt /** 3878884bfe89SSlava Pestov * ring_buffer_dropped_events_cpu - get the number of dropped events caused by 3879884bfe89SSlava Pestov * the ring buffer filling up (only if RB_FL_OVERWRITE is off). 3880884bfe89SSlava Pestov * @buffer: The ring buffer 3881884bfe89SSlava Pestov * @cpu: The per CPU buffer to get the number of overruns from 3882884bfe89SSlava Pestov */ 3883884bfe89SSlava Pestov unsigned long 388413292494SSteven Rostedt (VMware) ring_buffer_dropped_events_cpu(struct trace_buffer *buffer, int cpu) 3885884bfe89SSlava Pestov { 3886884bfe89SSlava Pestov struct ring_buffer_per_cpu *cpu_buffer; 3887884bfe89SSlava Pestov unsigned long ret; 3888884bfe89SSlava Pestov 3889884bfe89SSlava Pestov if (!cpumask_test_cpu(cpu, buffer->cpumask)) 3890884bfe89SSlava Pestov return 0; 3891884bfe89SSlava Pestov 3892884bfe89SSlava Pestov cpu_buffer = buffer->buffers[cpu]; 3893884bfe89SSlava Pestov ret = local_read(&cpu_buffer->dropped_events); 3894884bfe89SSlava Pestov 3895884bfe89SSlava Pestov return ret; 3896884bfe89SSlava Pestov } 3897884bfe89SSlava Pestov EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu); 3898884bfe89SSlava Pestov 3899884bfe89SSlava Pestov /** 3900ad964704SSteven Rostedt (Red Hat) * ring_buffer_read_events_cpu - get the number of events successfully read 3901ad964704SSteven Rostedt (Red Hat) * @buffer: The ring buffer 3902ad964704SSteven Rostedt (Red Hat) * @cpu: The per CPU buffer to get the number of events read 3903ad964704SSteven Rostedt (Red Hat) */ 3904ad964704SSteven Rostedt (Red Hat) unsigned long 390513292494SSteven Rostedt (VMware) ring_buffer_read_events_cpu(struct trace_buffer *buffer, int cpu) 3906ad964704SSteven Rostedt (Red Hat) { 3907ad964704SSteven Rostedt (Red Hat) struct ring_buffer_per_cpu *cpu_buffer; 3908ad964704SSteven Rostedt (Red Hat) 3909ad964704SSteven Rostedt (Red Hat) if (!cpumask_test_cpu(cpu, buffer->cpumask)) 3910ad964704SSteven Rostedt (Red Hat) return 0; 3911ad964704SSteven Rostedt (Red Hat) 3912ad964704SSteven Rostedt (Red Hat) cpu_buffer = buffer->buffers[cpu]; 3913ad964704SSteven Rostedt (Red Hat) return cpu_buffer->read; 3914ad964704SSteven Rostedt (Red Hat) } 3915ad964704SSteven Rostedt (Red Hat) EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu); 3916ad964704SSteven Rostedt (Red Hat) 3917ad964704SSteven Rostedt (Red Hat) /** 39187a8e76a3SSteven Rostedt * ring_buffer_entries - get the number of entries in a buffer 39197a8e76a3SSteven Rostedt * @buffer: The ring buffer 39207a8e76a3SSteven Rostedt * 39217a8e76a3SSteven Rostedt * Returns the total number of entries in the ring buffer 39227a8e76a3SSteven Rostedt * (all CPU entries) 39237a8e76a3SSteven Rostedt */ 392413292494SSteven Rostedt (VMware) unsigned long ring_buffer_entries(struct trace_buffer *buffer) 39257a8e76a3SSteven Rostedt { 39267a8e76a3SSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer; 39277a8e76a3SSteven Rostedt unsigned long entries = 0; 39287a8e76a3SSteven Rostedt int cpu; 39297a8e76a3SSteven Rostedt 39307a8e76a3SSteven Rostedt /* if you care about this being correct, lock the buffer */ 39317a8e76a3SSteven Rostedt for_each_buffer_cpu(buffer, cpu) { 39327a8e76a3SSteven Rostedt cpu_buffer = buffer->buffers[cpu]; 3933f6195aa0SSteven Rostedt entries += rb_num_of_entries(cpu_buffer); 39347a8e76a3SSteven Rostedt } 39357a8e76a3SSteven Rostedt 39367a8e76a3SSteven Rostedt return entries; 39377a8e76a3SSteven Rostedt } 3938c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_entries); 39397a8e76a3SSteven Rostedt 39407a8e76a3SSteven Rostedt /** 394167b394f7SJiri Olsa * ring_buffer_overruns - get the number of overruns in buffer 39427a8e76a3SSteven Rostedt * @buffer: The ring buffer 39437a8e76a3SSteven Rostedt * 39447a8e76a3SSteven Rostedt * Returns the total number of overruns in the ring buffer 39457a8e76a3SSteven Rostedt * (all CPU entries) 39467a8e76a3SSteven Rostedt */ 394713292494SSteven Rostedt (VMware) unsigned long ring_buffer_overruns(struct trace_buffer *buffer) 39487a8e76a3SSteven Rostedt { 39497a8e76a3SSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer; 39507a8e76a3SSteven Rostedt unsigned long overruns = 0; 39517a8e76a3SSteven Rostedt int cpu; 39527a8e76a3SSteven Rostedt 39537a8e76a3SSteven Rostedt /* if you care about this being correct, lock the buffer */ 39547a8e76a3SSteven Rostedt for_each_buffer_cpu(buffer, cpu) { 39557a8e76a3SSteven Rostedt cpu_buffer = buffer->buffers[cpu]; 395677ae365eSSteven Rostedt overruns += local_read(&cpu_buffer->overrun); 39577a8e76a3SSteven Rostedt } 39587a8e76a3SSteven Rostedt 39597a8e76a3SSteven Rostedt return overruns; 39607a8e76a3SSteven Rostedt } 3961c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_overruns); 39627a8e76a3SSteven Rostedt 3963642edba5SSteven Rostedt static void rb_iter_reset(struct ring_buffer_iter *iter) 39647a8e76a3SSteven Rostedt { 39657a8e76a3SSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 39667a8e76a3SSteven Rostedt 3967d769041fSSteven Rostedt /* Iterator usage is expected to have record disabled */ 3968d769041fSSteven Rostedt iter->head_page = cpu_buffer->reader_page; 39696f807acdSSteven Rostedt iter->head = cpu_buffer->reader_page->read; 3970785888c5SSteven Rostedt (VMware) iter->next_event = iter->head; 3971651e22f2SSteven Rostedt (Red Hat) 3972651e22f2SSteven Rostedt (Red Hat) iter->cache_reader_page = iter->head_page; 397324607f11SSteven Rostedt (Red Hat) iter->cache_read = cpu_buffer->read; 3974651e22f2SSteven Rostedt (Red Hat) 397528e3fc56SSteven Rostedt (VMware) if (iter->head) { 3976d769041fSSteven Rostedt iter->read_stamp = cpu_buffer->read_stamp; 397728e3fc56SSteven Rostedt (VMware) iter->page_stamp = cpu_buffer->reader_page->page->time_stamp; 397828e3fc56SSteven Rostedt (VMware) } else { 3979abc9b56dSSteven Rostedt iter->read_stamp = iter->head_page->page->time_stamp; 398028e3fc56SSteven Rostedt (VMware) iter->page_stamp = iter->read_stamp; 398128e3fc56SSteven Rostedt (VMware) } 3982642edba5SSteven Rostedt } 3983f83c9d0fSSteven Rostedt 3984642edba5SSteven Rostedt /** 3985642edba5SSteven Rostedt * ring_buffer_iter_reset - reset an iterator 3986642edba5SSteven Rostedt * @iter: The iterator to reset 3987642edba5SSteven Rostedt * 3988642edba5SSteven Rostedt * Resets the iterator, so that it will start from the beginning 3989642edba5SSteven Rostedt * again. 3990642edba5SSteven Rostedt */ 3991642edba5SSteven Rostedt void ring_buffer_iter_reset(struct ring_buffer_iter *iter) 3992642edba5SSteven Rostedt { 3993554f786eSSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer; 3994642edba5SSteven Rostedt unsigned long flags; 3995642edba5SSteven Rostedt 3996554f786eSSteven Rostedt if (!iter) 3997554f786eSSteven Rostedt return; 3998554f786eSSteven Rostedt 3999554f786eSSteven Rostedt cpu_buffer = iter->cpu_buffer; 4000554f786eSSteven Rostedt 40015389f6faSThomas Gleixner raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 4002642edba5SSteven Rostedt rb_iter_reset(iter); 40035389f6faSThomas Gleixner raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 40047a8e76a3SSteven Rostedt } 4005c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_iter_reset); 40067a8e76a3SSteven Rostedt 40077a8e76a3SSteven Rostedt /** 40087a8e76a3SSteven Rostedt * ring_buffer_iter_empty - check if an iterator has no more to read 40097a8e76a3SSteven Rostedt * @iter: The iterator to check 40107a8e76a3SSteven Rostedt */ 40117a8e76a3SSteven Rostedt int ring_buffer_iter_empty(struct ring_buffer_iter *iter) 40127a8e76a3SSteven Rostedt { 40137a8e76a3SSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer; 401478f7a45dSSteven Rostedt (VMware) struct buffer_page *reader; 401578f7a45dSSteven Rostedt (VMware) struct buffer_page *head_page; 401678f7a45dSSteven Rostedt (VMware) struct buffer_page *commit_page; 4017ead6ecfdSSteven Rostedt (VMware) struct buffer_page *curr_commit_page; 401878f7a45dSSteven Rostedt (VMware) unsigned commit; 4019ead6ecfdSSteven Rostedt (VMware) u64 curr_commit_ts; 4020ead6ecfdSSteven Rostedt (VMware) u64 commit_ts; 40217a8e76a3SSteven Rostedt 40227a8e76a3SSteven Rostedt cpu_buffer = iter->cpu_buffer; 402378f7a45dSSteven Rostedt (VMware) reader = cpu_buffer->reader_page; 402478f7a45dSSteven Rostedt (VMware) head_page = cpu_buffer->head_page; 402578f7a45dSSteven Rostedt (VMware) commit_page = cpu_buffer->commit_page; 4026ead6ecfdSSteven Rostedt (VMware) commit_ts = commit_page->page->time_stamp; 402778f7a45dSSteven Rostedt (VMware) 4028ead6ecfdSSteven Rostedt (VMware) /* 4029ead6ecfdSSteven Rostedt (VMware) * When the writer goes across pages, it issues a cmpxchg which 4030ead6ecfdSSteven Rostedt (VMware) * is a mb(), which will synchronize with the rmb here. 4031ead6ecfdSSteven Rostedt (VMware) * (see rb_tail_page_update()) 4032ead6ecfdSSteven Rostedt (VMware) */ 4033ead6ecfdSSteven Rostedt (VMware) smp_rmb(); 4034ead6ecfdSSteven Rostedt (VMware) commit = rb_page_commit(commit_page); 4035ead6ecfdSSteven Rostedt (VMware) /* We want to make sure that the commit page doesn't change */ 4036ead6ecfdSSteven Rostedt (VMware) smp_rmb(); 4037ead6ecfdSSteven Rostedt (VMware) 4038ead6ecfdSSteven Rostedt (VMware) /* Make sure commit page didn't change */ 4039ead6ecfdSSteven Rostedt (VMware) curr_commit_page = READ_ONCE(cpu_buffer->commit_page); 4040ead6ecfdSSteven Rostedt (VMware) curr_commit_ts = READ_ONCE(curr_commit_page->page->time_stamp); 4041ead6ecfdSSteven Rostedt (VMware) 4042ead6ecfdSSteven Rostedt (VMware) /* If the commit page changed, then there's more data */ 4043ead6ecfdSSteven Rostedt (VMware) if (curr_commit_page != commit_page || 4044ead6ecfdSSteven Rostedt (VMware) curr_commit_ts != commit_ts) 4045ead6ecfdSSteven Rostedt (VMware) return 0; 4046ead6ecfdSSteven Rostedt (VMware) 4047ead6ecfdSSteven Rostedt (VMware) /* Still racy, as it may return a false positive, but that's OK */ 4048785888c5SSteven Rostedt (VMware) return ((iter->head_page == commit_page && iter->head >= commit) || 404978f7a45dSSteven Rostedt (VMware) (iter->head_page == reader && commit_page == head_page && 405078f7a45dSSteven Rostedt (VMware) head_page->read == commit && 405178f7a45dSSteven Rostedt (VMware) iter->head == rb_page_commit(cpu_buffer->reader_page))); 40527a8e76a3SSteven Rostedt } 4053c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_iter_empty); 40547a8e76a3SSteven Rostedt 40557a8e76a3SSteven Rostedt static void 40567a8e76a3SSteven Rostedt rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer, 40577a8e76a3SSteven Rostedt struct ring_buffer_event *event) 40587a8e76a3SSteven Rostedt { 40597a8e76a3SSteven Rostedt u64 delta; 40607a8e76a3SSteven Rostedt 4061334d4169SLai Jiangshan switch (event->type_len) { 40627a8e76a3SSteven Rostedt case RINGBUF_TYPE_PADDING: 40637a8e76a3SSteven Rostedt return; 40647a8e76a3SSteven Rostedt 40657a8e76a3SSteven Rostedt case RINGBUF_TYPE_TIME_EXTEND: 4066dc4e2801STom Zanussi delta = ring_buffer_event_time_stamp(event); 40677a8e76a3SSteven Rostedt cpu_buffer->read_stamp += delta; 40687a8e76a3SSteven Rostedt return; 40697a8e76a3SSteven Rostedt 40707a8e76a3SSteven Rostedt case RINGBUF_TYPE_TIME_STAMP: 4071dc4e2801STom Zanussi delta = ring_buffer_event_time_stamp(event); 4072dc4e2801STom Zanussi cpu_buffer->read_stamp = delta; 40737a8e76a3SSteven Rostedt return; 40747a8e76a3SSteven Rostedt 40757a8e76a3SSteven Rostedt case RINGBUF_TYPE_DATA: 40767a8e76a3SSteven Rostedt cpu_buffer->read_stamp += event->time_delta; 40777a8e76a3SSteven Rostedt return; 40787a8e76a3SSteven Rostedt 40797a8e76a3SSteven Rostedt default: 4080da4d401aSSteven Rostedt (VMware) RB_WARN_ON(cpu_buffer, 1); 40817a8e76a3SSteven Rostedt } 40827a8e76a3SSteven Rostedt return; 40837a8e76a3SSteven Rostedt } 40847a8e76a3SSteven Rostedt 40857a8e76a3SSteven Rostedt static void 40867a8e76a3SSteven Rostedt rb_update_iter_read_stamp(struct ring_buffer_iter *iter, 40877a8e76a3SSteven Rostedt struct ring_buffer_event *event) 40887a8e76a3SSteven Rostedt { 40897a8e76a3SSteven Rostedt u64 delta; 40907a8e76a3SSteven Rostedt 4091334d4169SLai Jiangshan switch (event->type_len) { 40927a8e76a3SSteven Rostedt case RINGBUF_TYPE_PADDING: 40937a8e76a3SSteven Rostedt return; 40947a8e76a3SSteven Rostedt 40957a8e76a3SSteven Rostedt case RINGBUF_TYPE_TIME_EXTEND: 4096dc4e2801STom Zanussi delta = ring_buffer_event_time_stamp(event); 40977a8e76a3SSteven Rostedt iter->read_stamp += delta; 40987a8e76a3SSteven Rostedt return; 40997a8e76a3SSteven Rostedt 41007a8e76a3SSteven Rostedt case RINGBUF_TYPE_TIME_STAMP: 4101dc4e2801STom Zanussi delta = ring_buffer_event_time_stamp(event); 4102dc4e2801STom Zanussi iter->read_stamp = delta; 41037a8e76a3SSteven Rostedt return; 41047a8e76a3SSteven Rostedt 41057a8e76a3SSteven Rostedt case RINGBUF_TYPE_DATA: 41067a8e76a3SSteven Rostedt iter->read_stamp += event->time_delta; 41077a8e76a3SSteven Rostedt return; 41087a8e76a3SSteven Rostedt 41097a8e76a3SSteven Rostedt default: 4110da4d401aSSteven Rostedt (VMware) RB_WARN_ON(iter->cpu_buffer, 1); 41117a8e76a3SSteven Rostedt } 41127a8e76a3SSteven Rostedt return; 41137a8e76a3SSteven Rostedt } 41147a8e76a3SSteven Rostedt 4115d769041fSSteven Rostedt static struct buffer_page * 4116d769041fSSteven Rostedt rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) 41177a8e76a3SSteven Rostedt { 4118d769041fSSteven Rostedt struct buffer_page *reader = NULL; 411966a8cb95SSteven Rostedt unsigned long overwrite; 4120d769041fSSteven Rostedt unsigned long flags; 4121818e3dd3SSteven Rostedt int nr_loops = 0; 412277ae365eSSteven Rostedt int ret; 4123d769041fSSteven Rostedt 41243e03fb7fSSteven Rostedt local_irq_save(flags); 41250199c4e6SThomas Gleixner arch_spin_lock(&cpu_buffer->lock); 4126d769041fSSteven Rostedt 4127d769041fSSteven Rostedt again: 4128818e3dd3SSteven Rostedt /* 4129818e3dd3SSteven Rostedt * This should normally only loop twice. But because the 4130818e3dd3SSteven Rostedt * start of the reader inserts an empty page, it causes 4131818e3dd3SSteven Rostedt * a case where we will loop three times. There should be no 4132818e3dd3SSteven Rostedt * reason to loop four times (that I know of). 4133818e3dd3SSteven Rostedt */ 41343e89c7bbSSteven Rostedt if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) { 4135818e3dd3SSteven Rostedt reader = NULL; 4136818e3dd3SSteven Rostedt goto out; 4137818e3dd3SSteven Rostedt } 4138818e3dd3SSteven Rostedt 4139d769041fSSteven Rostedt reader = cpu_buffer->reader_page; 4140d769041fSSteven Rostedt 4141d769041fSSteven Rostedt /* If there's more to read, return this page */ 4142bf41a158SSteven Rostedt if (cpu_buffer->reader_page->read < rb_page_size(reader)) 4143d769041fSSteven Rostedt goto out; 4144d769041fSSteven Rostedt 4145d769041fSSteven Rostedt /* Never should we have an index greater than the size */ 41463e89c7bbSSteven Rostedt if (RB_WARN_ON(cpu_buffer, 41473e89c7bbSSteven Rostedt cpu_buffer->reader_page->read > rb_page_size(reader))) 41483e89c7bbSSteven Rostedt goto out; 4149d769041fSSteven Rostedt 4150d769041fSSteven Rostedt /* check if we caught up to the tail */ 4151d769041fSSteven Rostedt reader = NULL; 4152bf41a158SSteven Rostedt if (cpu_buffer->commit_page == cpu_buffer->reader_page) 4153d769041fSSteven Rostedt goto out; 41547a8e76a3SSteven Rostedt 4155a5fb8331SSteven Rostedt /* Don't bother swapping if the ring buffer is empty */ 4156a5fb8331SSteven Rostedt if (rb_num_of_entries(cpu_buffer) == 0) 4157a5fb8331SSteven Rostedt goto out; 4158a5fb8331SSteven Rostedt 41597a8e76a3SSteven Rostedt /* 4160d769041fSSteven Rostedt * Reset the reader page to size zero. 41617a8e76a3SSteven Rostedt */ 416277ae365eSSteven Rostedt local_set(&cpu_buffer->reader_page->write, 0); 416377ae365eSSteven Rostedt local_set(&cpu_buffer->reader_page->entries, 0); 416477ae365eSSteven Rostedt local_set(&cpu_buffer->reader_page->page->commit, 0); 4165ff0ff84aSSteven Rostedt cpu_buffer->reader_page->real_end = 0; 4166d769041fSSteven Rostedt 416777ae365eSSteven Rostedt spin: 416877ae365eSSteven Rostedt /* 416977ae365eSSteven Rostedt * Splice the empty reader page into the list around the head. 417077ae365eSSteven Rostedt */ 417177ae365eSSteven Rostedt reader = rb_set_head_page(cpu_buffer); 417254f7be5bSSteven Rostedt if (!reader) 417354f7be5bSSteven Rostedt goto out; 41740e1ff5d7SSteven Rostedt cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next); 4175d769041fSSteven Rostedt cpu_buffer->reader_page->list.prev = reader->list.prev; 4176bf41a158SSteven Rostedt 41773adc54faSSteven Rostedt /* 41783adc54faSSteven Rostedt * cpu_buffer->pages just needs to point to the buffer, it 41793adc54faSSteven Rostedt * has no specific buffer page to point to. Lets move it out 418025985edcSLucas De Marchi * of our way so we don't accidentally swap it. 41813adc54faSSteven Rostedt */ 41823adc54faSSteven Rostedt cpu_buffer->pages = reader->list.prev; 41833adc54faSSteven Rostedt 418477ae365eSSteven Rostedt /* The reader page will be pointing to the new head */ 418577ae365eSSteven Rostedt rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); 4186d769041fSSteven Rostedt 4187d769041fSSteven Rostedt /* 418866a8cb95SSteven Rostedt * We want to make sure we read the overruns after we set up our 418966a8cb95SSteven Rostedt * pointers to the next object. The writer side does a 419066a8cb95SSteven Rostedt * cmpxchg to cross pages which acts as the mb on the writer 419166a8cb95SSteven Rostedt * side. Note, the reader will constantly fail the swap 419266a8cb95SSteven Rostedt * while the writer is updating the pointers, so this 419366a8cb95SSteven Rostedt * guarantees that the overwrite recorded here is the one we 419466a8cb95SSteven Rostedt * want to compare with the last_overrun. 419566a8cb95SSteven Rostedt */ 419666a8cb95SSteven Rostedt smp_mb(); 419766a8cb95SSteven Rostedt overwrite = local_read(&(cpu_buffer->overrun)); 419866a8cb95SSteven Rostedt 419966a8cb95SSteven Rostedt /* 420077ae365eSSteven Rostedt * Here's the tricky part. 420177ae365eSSteven Rostedt * 420277ae365eSSteven Rostedt * We need to move the pointer past the header page. 420377ae365eSSteven Rostedt * But we can only do that if a writer is not currently 420477ae365eSSteven Rostedt * moving it. The page before the header page has the 420577ae365eSSteven Rostedt * flag bit '1' set if it is pointing to the page we want. 420677ae365eSSteven Rostedt * but if the writer is in the process of moving it 420777ae365eSSteven Rostedt * than it will be '2' or already moved '0'. 4208d769041fSSteven Rostedt */ 4209d769041fSSteven Rostedt 421077ae365eSSteven Rostedt ret = rb_head_page_replace(reader, cpu_buffer->reader_page); 421177ae365eSSteven Rostedt 421277ae365eSSteven Rostedt /* 421377ae365eSSteven Rostedt * If we did not convert it, then we must try again. 421477ae365eSSteven Rostedt */ 421577ae365eSSteven Rostedt if (!ret) 421677ae365eSSteven Rostedt goto spin; 421777ae365eSSteven Rostedt 421877ae365eSSteven Rostedt /* 42192c2b0a78SSteven Rostedt (VMware) * Yay! We succeeded in replacing the page. 422077ae365eSSteven Rostedt * 422177ae365eSSteven Rostedt * Now make the new head point back to the reader page. 422277ae365eSSteven Rostedt */ 42235ded3dc6SDavid Sharp rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list; 42247a8e76a3SSteven Rostedt rb_inc_page(cpu_buffer, &cpu_buffer->head_page); 4225d769041fSSteven Rostedt 42262c2b0a78SSteven Rostedt (VMware) local_inc(&cpu_buffer->pages_read); 42272c2b0a78SSteven Rostedt (VMware) 4228d769041fSSteven Rostedt /* Finally update the reader page to the new head */ 4229d769041fSSteven Rostedt cpu_buffer->reader_page = reader; 4230b81f472aSSteven Rostedt (Red Hat) cpu_buffer->reader_page->read = 0; 4231d769041fSSteven Rostedt 423266a8cb95SSteven Rostedt if (overwrite != cpu_buffer->last_overrun) { 423366a8cb95SSteven Rostedt cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun; 423466a8cb95SSteven Rostedt cpu_buffer->last_overrun = overwrite; 423566a8cb95SSteven Rostedt } 423666a8cb95SSteven Rostedt 4237d769041fSSteven Rostedt goto again; 4238d769041fSSteven Rostedt 4239d769041fSSteven Rostedt out: 4240b81f472aSSteven Rostedt (Red Hat) /* Update the read_stamp on the first event */ 4241b81f472aSSteven Rostedt (Red Hat) if (reader && reader->read == 0) 4242b81f472aSSteven Rostedt (Red Hat) cpu_buffer->read_stamp = reader->page->time_stamp; 4243b81f472aSSteven Rostedt (Red Hat) 42440199c4e6SThomas Gleixner arch_spin_unlock(&cpu_buffer->lock); 42453e03fb7fSSteven Rostedt local_irq_restore(flags); 4246d769041fSSteven Rostedt 4247d769041fSSteven Rostedt return reader; 42487a8e76a3SSteven Rostedt } 42497a8e76a3SSteven Rostedt 4250d769041fSSteven Rostedt static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) 4251d769041fSSteven Rostedt { 4252d769041fSSteven Rostedt struct ring_buffer_event *event; 4253d769041fSSteven Rostedt struct buffer_page *reader; 4254d769041fSSteven Rostedt unsigned length; 4255d769041fSSteven Rostedt 4256d769041fSSteven Rostedt reader = rb_get_reader_page(cpu_buffer); 4257d769041fSSteven Rostedt 4258d769041fSSteven Rostedt /* This function should not be called when buffer is empty */ 42593e89c7bbSSteven Rostedt if (RB_WARN_ON(cpu_buffer, !reader)) 42603e89c7bbSSteven Rostedt return; 4261d769041fSSteven Rostedt 4262d769041fSSteven Rostedt event = rb_reader_event(cpu_buffer); 42637a8e76a3SSteven Rostedt 4264a1863c21SSteven Rostedt if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 4265e4906effSSteven Rostedt cpu_buffer->read++; 42667a8e76a3SSteven Rostedt 42677a8e76a3SSteven Rostedt rb_update_read_stamp(cpu_buffer, event); 42687a8e76a3SSteven Rostedt 4269d769041fSSteven Rostedt length = rb_event_length(event); 42706f807acdSSteven Rostedt cpu_buffer->reader_page->read += length; 42717a8e76a3SSteven Rostedt } 42727a8e76a3SSteven Rostedt 42737a8e76a3SSteven Rostedt static void rb_advance_iter(struct ring_buffer_iter *iter) 42747a8e76a3SSteven Rostedt { 42757a8e76a3SSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer; 42767a8e76a3SSteven Rostedt 42777a8e76a3SSteven Rostedt cpu_buffer = iter->cpu_buffer; 42787a8e76a3SSteven Rostedt 4279785888c5SSteven Rostedt (VMware) /* If head == next_event then we need to jump to the next event */ 4280785888c5SSteven Rostedt (VMware) if (iter->head == iter->next_event) { 4281785888c5SSteven Rostedt (VMware) /* If the event gets overwritten again, there's nothing to do */ 4282785888c5SSteven Rostedt (VMware) if (rb_iter_head_event(iter) == NULL) 4283785888c5SSteven Rostedt (VMware) return; 4284785888c5SSteven Rostedt (VMware) } 4285785888c5SSteven Rostedt (VMware) 4286785888c5SSteven Rostedt (VMware) iter->head = iter->next_event; 4287785888c5SSteven Rostedt (VMware) 42887a8e76a3SSteven Rostedt /* 42897a8e76a3SSteven Rostedt * Check if we are at the end of the buffer. 42907a8e76a3SSteven Rostedt */ 4291785888c5SSteven Rostedt (VMware) if (iter->next_event >= rb_page_size(iter->head_page)) { 4292ea05b57cSSteven Rostedt /* discarded commits can make the page empty */ 4293ea05b57cSSteven Rostedt if (iter->head_page == cpu_buffer->commit_page) 42943e89c7bbSSteven Rostedt return; 4295d769041fSSteven Rostedt rb_inc_iter(iter); 42967a8e76a3SSteven Rostedt return; 42977a8e76a3SSteven Rostedt } 42987a8e76a3SSteven Rostedt 4299785888c5SSteven Rostedt (VMware) rb_update_iter_read_stamp(iter, iter->event); 43007a8e76a3SSteven Rostedt } 43017a8e76a3SSteven Rostedt 430266a8cb95SSteven Rostedt static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer) 430366a8cb95SSteven Rostedt { 430466a8cb95SSteven Rostedt return cpu_buffer->lost_events; 430566a8cb95SSteven Rostedt } 430666a8cb95SSteven Rostedt 4307f83c9d0fSSteven Rostedt static struct ring_buffer_event * 430866a8cb95SSteven Rostedt rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, 430966a8cb95SSteven Rostedt unsigned long *lost_events) 43107a8e76a3SSteven Rostedt { 43117a8e76a3SSteven Rostedt struct ring_buffer_event *event; 4312d769041fSSteven Rostedt struct buffer_page *reader; 4313818e3dd3SSteven Rostedt int nr_loops = 0; 43147a8e76a3SSteven Rostedt 4315dc4e2801STom Zanussi if (ts) 4316dc4e2801STom Zanussi *ts = 0; 43177a8e76a3SSteven Rostedt again: 4318818e3dd3SSteven Rostedt /* 431969d1b839SSteven Rostedt * We repeat when a time extend is encountered. 432069d1b839SSteven Rostedt * Since the time extend is always attached to a data event, 432169d1b839SSteven Rostedt * we should never loop more than once. 432269d1b839SSteven Rostedt * (We never hit the following condition more than twice). 4323818e3dd3SSteven Rostedt */ 432469d1b839SSteven Rostedt if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) 4325818e3dd3SSteven Rostedt return NULL; 4326818e3dd3SSteven Rostedt 4327d769041fSSteven Rostedt reader = rb_get_reader_page(cpu_buffer); 4328d769041fSSteven Rostedt if (!reader) 43297a8e76a3SSteven Rostedt return NULL; 43307a8e76a3SSteven Rostedt 4331d769041fSSteven Rostedt event = rb_reader_event(cpu_buffer); 43327a8e76a3SSteven Rostedt 4333334d4169SLai Jiangshan switch (event->type_len) { 43347a8e76a3SSteven Rostedt case RINGBUF_TYPE_PADDING: 43352d622719STom Zanussi if (rb_null_event(event)) 4336bf41a158SSteven Rostedt RB_WARN_ON(cpu_buffer, 1); 43372d622719STom Zanussi /* 43382d622719STom Zanussi * Because the writer could be discarding every 43392d622719STom Zanussi * event it creates (which would probably be bad) 43402d622719STom Zanussi * if we were to go back to "again" then we may never 43412d622719STom Zanussi * catch up, and will trigger the warn on, or lock 43422d622719STom Zanussi * the box. Return the padding, and we will release 43432d622719STom Zanussi * the current locks, and try again. 43442d622719STom Zanussi */ 43452d622719STom Zanussi return event; 43467a8e76a3SSteven Rostedt 43477a8e76a3SSteven Rostedt case RINGBUF_TYPE_TIME_EXTEND: 43487a8e76a3SSteven Rostedt /* Internal data, OK to advance */ 4349d769041fSSteven Rostedt rb_advance_reader(cpu_buffer); 43507a8e76a3SSteven Rostedt goto again; 43517a8e76a3SSteven Rostedt 43527a8e76a3SSteven Rostedt case RINGBUF_TYPE_TIME_STAMP: 4353dc4e2801STom Zanussi if (ts) { 4354dc4e2801STom Zanussi *ts = ring_buffer_event_time_stamp(event); 4355dc4e2801STom Zanussi ring_buffer_normalize_time_stamp(cpu_buffer->buffer, 4356dc4e2801STom Zanussi cpu_buffer->cpu, ts); 4357dc4e2801STom Zanussi } 4358dc4e2801STom Zanussi /* Internal data, OK to advance */ 4359d769041fSSteven Rostedt rb_advance_reader(cpu_buffer); 43607a8e76a3SSteven Rostedt goto again; 43617a8e76a3SSteven Rostedt 43627a8e76a3SSteven Rostedt case RINGBUF_TYPE_DATA: 4363dc4e2801STom Zanussi if (ts && !(*ts)) { 43647a8e76a3SSteven Rostedt *ts = cpu_buffer->read_stamp + event->time_delta; 4365d8eeb2d3SRobert Richter ring_buffer_normalize_time_stamp(cpu_buffer->buffer, 436637886f6aSSteven Rostedt cpu_buffer->cpu, ts); 43677a8e76a3SSteven Rostedt } 436866a8cb95SSteven Rostedt if (lost_events) 436966a8cb95SSteven Rostedt *lost_events = rb_lost_events(cpu_buffer); 43707a8e76a3SSteven Rostedt return event; 43717a8e76a3SSteven Rostedt 43727a8e76a3SSteven Rostedt default: 4373da4d401aSSteven Rostedt (VMware) RB_WARN_ON(cpu_buffer, 1); 43747a8e76a3SSteven Rostedt } 43757a8e76a3SSteven Rostedt 43767a8e76a3SSteven Rostedt return NULL; 43777a8e76a3SSteven Rostedt } 4378c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_peek); 43797a8e76a3SSteven Rostedt 4380f83c9d0fSSteven Rostedt static struct ring_buffer_event * 4381f83c9d0fSSteven Rostedt rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) 43827a8e76a3SSteven Rostedt { 438313292494SSteven Rostedt (VMware) struct trace_buffer *buffer; 43847a8e76a3SSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer; 43857a8e76a3SSteven Rostedt struct ring_buffer_event *event; 4386818e3dd3SSteven Rostedt int nr_loops = 0; 43877a8e76a3SSteven Rostedt 4388dc4e2801STom Zanussi if (ts) 4389dc4e2801STom Zanussi *ts = 0; 4390dc4e2801STom Zanussi 43917a8e76a3SSteven Rostedt cpu_buffer = iter->cpu_buffer; 43927a8e76a3SSteven Rostedt buffer = cpu_buffer->buffer; 43937a8e76a3SSteven Rostedt 4394492a74f4SSteven Rostedt /* 4395492a74f4SSteven Rostedt * Check if someone performed a consuming read to 4396492a74f4SSteven Rostedt * the buffer. A consuming read invalidates the iterator 4397492a74f4SSteven Rostedt * and we need to reset the iterator in this case. 4398492a74f4SSteven Rostedt */ 4399492a74f4SSteven Rostedt if (unlikely(iter->cache_read != cpu_buffer->read || 4400492a74f4SSteven Rostedt iter->cache_reader_page != cpu_buffer->reader_page)) 4401492a74f4SSteven Rostedt rb_iter_reset(iter); 4402492a74f4SSteven Rostedt 44037a8e76a3SSteven Rostedt again: 44043c05d748SSteven Rostedt if (ring_buffer_iter_empty(iter)) 44053c05d748SSteven Rostedt return NULL; 44063c05d748SSteven Rostedt 4407818e3dd3SSteven Rostedt /* 44083d2353deSSteven Rostedt (VMware) * As the writer can mess with what the iterator is trying 44093d2353deSSteven Rostedt (VMware) * to read, just give up if we fail to get an event after 44103d2353deSSteven Rostedt (VMware) * three tries. The iterator is not as reliable when reading 44113d2353deSSteven Rostedt (VMware) * the ring buffer with an active write as the consumer is. 44123d2353deSSteven Rostedt (VMware) * Do not warn if the three failures is reached. 4413818e3dd3SSteven Rostedt */ 44143d2353deSSteven Rostedt (VMware) if (++nr_loops > 3) 4415818e3dd3SSteven Rostedt return NULL; 4416818e3dd3SSteven Rostedt 44177a8e76a3SSteven Rostedt if (rb_per_cpu_empty(cpu_buffer)) 44187a8e76a3SSteven Rostedt return NULL; 44197a8e76a3SSteven Rostedt 442010e83fd0SSteven Rostedt (Red Hat) if (iter->head >= rb_page_size(iter->head_page)) { 44213c05d748SSteven Rostedt rb_inc_iter(iter); 44223c05d748SSteven Rostedt goto again; 44233c05d748SSteven Rostedt } 44243c05d748SSteven Rostedt 44257a8e76a3SSteven Rostedt event = rb_iter_head_event(iter); 44263d2353deSSteven Rostedt (VMware) if (!event) 4427785888c5SSteven Rostedt (VMware) goto again; 44287a8e76a3SSteven Rostedt 4429334d4169SLai Jiangshan switch (event->type_len) { 44307a8e76a3SSteven Rostedt case RINGBUF_TYPE_PADDING: 44312d622719STom Zanussi if (rb_null_event(event)) { 4432d769041fSSteven Rostedt rb_inc_iter(iter); 44337a8e76a3SSteven Rostedt goto again; 44342d622719STom Zanussi } 44352d622719STom Zanussi rb_advance_iter(iter); 44362d622719STom Zanussi return event; 44377a8e76a3SSteven Rostedt 44387a8e76a3SSteven Rostedt case RINGBUF_TYPE_TIME_EXTEND: 44397a8e76a3SSteven Rostedt /* Internal data, OK to advance */ 44407a8e76a3SSteven Rostedt rb_advance_iter(iter); 44417a8e76a3SSteven Rostedt goto again; 44427a8e76a3SSteven Rostedt 44437a8e76a3SSteven Rostedt case RINGBUF_TYPE_TIME_STAMP: 4444dc4e2801STom Zanussi if (ts) { 4445dc4e2801STom Zanussi *ts = ring_buffer_event_time_stamp(event); 4446dc4e2801STom Zanussi ring_buffer_normalize_time_stamp(cpu_buffer->buffer, 4447dc4e2801STom Zanussi cpu_buffer->cpu, ts); 4448dc4e2801STom Zanussi } 4449dc4e2801STom Zanussi /* Internal data, OK to advance */ 44507a8e76a3SSteven Rostedt rb_advance_iter(iter); 44517a8e76a3SSteven Rostedt goto again; 44527a8e76a3SSteven Rostedt 44537a8e76a3SSteven Rostedt case RINGBUF_TYPE_DATA: 4454dc4e2801STom Zanussi if (ts && !(*ts)) { 44557a8e76a3SSteven Rostedt *ts = iter->read_stamp + event->time_delta; 445637886f6aSSteven Rostedt ring_buffer_normalize_time_stamp(buffer, 445737886f6aSSteven Rostedt cpu_buffer->cpu, ts); 44587a8e76a3SSteven Rostedt } 44597a8e76a3SSteven Rostedt return event; 44607a8e76a3SSteven Rostedt 44617a8e76a3SSteven Rostedt default: 4462da4d401aSSteven Rostedt (VMware) RB_WARN_ON(cpu_buffer, 1); 44637a8e76a3SSteven Rostedt } 44647a8e76a3SSteven Rostedt 44657a8e76a3SSteven Rostedt return NULL; 44667a8e76a3SSteven Rostedt } 4467c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_iter_peek); 44687a8e76a3SSteven Rostedt 4469289a5a25SSteven Rostedt (Red Hat) static inline bool rb_reader_lock(struct ring_buffer_per_cpu *cpu_buffer) 44708d707e8eSSteven Rostedt { 4471289a5a25SSteven Rostedt (Red Hat) if (likely(!in_nmi())) { 4472289a5a25SSteven Rostedt (Red Hat) raw_spin_lock(&cpu_buffer->reader_lock); 4473289a5a25SSteven Rostedt (Red Hat) return true; 4474289a5a25SSteven Rostedt (Red Hat) } 4475289a5a25SSteven Rostedt (Red Hat) 44768d707e8eSSteven Rostedt /* 44778d707e8eSSteven Rostedt * If an NMI die dumps out the content of the ring buffer 4478289a5a25SSteven Rostedt (Red Hat) * trylock must be used to prevent a deadlock if the NMI 4479289a5a25SSteven Rostedt (Red Hat) * preempted a task that holds the ring buffer locks. If 4480289a5a25SSteven Rostedt (Red Hat) * we get the lock then all is fine, if not, then continue 4481289a5a25SSteven Rostedt (Red Hat) * to do the read, but this can corrupt the ring buffer, 4482289a5a25SSteven Rostedt (Red Hat) * so it must be permanently disabled from future writes. 4483289a5a25SSteven Rostedt (Red Hat) * Reading from NMI is a oneshot deal. 44848d707e8eSSteven Rostedt */ 4485289a5a25SSteven Rostedt (Red Hat) if (raw_spin_trylock(&cpu_buffer->reader_lock)) 4486289a5a25SSteven Rostedt (Red Hat) return true; 44878d707e8eSSteven Rostedt 4488289a5a25SSteven Rostedt (Red Hat) /* Continue without locking, but disable the ring buffer */ 4489289a5a25SSteven Rostedt (Red Hat) atomic_inc(&cpu_buffer->record_disabled); 4490289a5a25SSteven Rostedt (Red Hat) return false; 4491289a5a25SSteven Rostedt (Red Hat) } 4492289a5a25SSteven Rostedt (Red Hat) 4493289a5a25SSteven Rostedt (Red Hat) static inline void 4494289a5a25SSteven Rostedt (Red Hat) rb_reader_unlock(struct ring_buffer_per_cpu *cpu_buffer, bool locked) 4495289a5a25SSteven Rostedt (Red Hat) { 4496289a5a25SSteven Rostedt (Red Hat) if (likely(locked)) 4497289a5a25SSteven Rostedt (Red Hat) raw_spin_unlock(&cpu_buffer->reader_lock); 4498289a5a25SSteven Rostedt (Red Hat) return; 44998d707e8eSSteven Rostedt } 45008d707e8eSSteven Rostedt 45017a8e76a3SSteven Rostedt /** 4502f83c9d0fSSteven Rostedt * ring_buffer_peek - peek at the next event to be read 4503f83c9d0fSSteven Rostedt * @buffer: The ring buffer to read 4504f83c9d0fSSteven Rostedt * @cpu: The cpu to peak at 4505f83c9d0fSSteven Rostedt * @ts: The timestamp counter of this event. 450666a8cb95SSteven Rostedt * @lost_events: a variable to store if events were lost (may be NULL) 4507f83c9d0fSSteven Rostedt * 4508f83c9d0fSSteven Rostedt * This will return the event that will be read next, but does 4509f83c9d0fSSteven Rostedt * not consume the data. 4510f83c9d0fSSteven Rostedt */ 4511f83c9d0fSSteven Rostedt struct ring_buffer_event * 451213292494SSteven Rostedt (VMware) ring_buffer_peek(struct trace_buffer *buffer, int cpu, u64 *ts, 451366a8cb95SSteven Rostedt unsigned long *lost_events) 4514f83c9d0fSSteven Rostedt { 4515f83c9d0fSSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 45168aabee57SSteven Rostedt struct ring_buffer_event *event; 4517f83c9d0fSSteven Rostedt unsigned long flags; 4518289a5a25SSteven Rostedt (Red Hat) bool dolock; 4519f83c9d0fSSteven Rostedt 4520554f786eSSteven Rostedt if (!cpumask_test_cpu(cpu, buffer->cpumask)) 45218aabee57SSteven Rostedt return NULL; 4522554f786eSSteven Rostedt 45232d622719STom Zanussi again: 45248d707e8eSSteven Rostedt local_irq_save(flags); 4525289a5a25SSteven Rostedt (Red Hat) dolock = rb_reader_lock(cpu_buffer); 452666a8cb95SSteven Rostedt event = rb_buffer_peek(cpu_buffer, ts, lost_events); 4527469535a5SRobert Richter if (event && event->type_len == RINGBUF_TYPE_PADDING) 4528469535a5SRobert Richter rb_advance_reader(cpu_buffer); 4529289a5a25SSteven Rostedt (Red Hat) rb_reader_unlock(cpu_buffer, dolock); 45308d707e8eSSteven Rostedt local_irq_restore(flags); 4531f83c9d0fSSteven Rostedt 45321b959e18SSteven Rostedt if (event && event->type_len == RINGBUF_TYPE_PADDING) 45332d622719STom Zanussi goto again; 45342d622719STom Zanussi 4535f83c9d0fSSteven Rostedt return event; 4536f83c9d0fSSteven Rostedt } 4537f83c9d0fSSteven Rostedt 4538c9b7a4a7SSteven Rostedt (VMware) /** ring_buffer_iter_dropped - report if there are dropped events 4539c9b7a4a7SSteven Rostedt (VMware) * @iter: The ring buffer iterator 4540c9b7a4a7SSteven Rostedt (VMware) * 4541c9b7a4a7SSteven Rostedt (VMware) * Returns true if there was dropped events since the last peek. 4542c9b7a4a7SSteven Rostedt (VMware) */ 4543c9b7a4a7SSteven Rostedt (VMware) bool ring_buffer_iter_dropped(struct ring_buffer_iter *iter) 4544c9b7a4a7SSteven Rostedt (VMware) { 4545c9b7a4a7SSteven Rostedt (VMware) bool ret = iter->missed_events != 0; 4546c9b7a4a7SSteven Rostedt (VMware) 4547c9b7a4a7SSteven Rostedt (VMware) iter->missed_events = 0; 4548c9b7a4a7SSteven Rostedt (VMware) return ret; 4549c9b7a4a7SSteven Rostedt (VMware) } 4550c9b7a4a7SSteven Rostedt (VMware) EXPORT_SYMBOL_GPL(ring_buffer_iter_dropped); 4551c9b7a4a7SSteven Rostedt (VMware) 4552f83c9d0fSSteven Rostedt /** 4553f83c9d0fSSteven Rostedt * ring_buffer_iter_peek - peek at the next event to be read 4554f83c9d0fSSteven Rostedt * @iter: The ring buffer iterator 4555f83c9d0fSSteven Rostedt * @ts: The timestamp counter of this event. 4556f83c9d0fSSteven Rostedt * 4557f83c9d0fSSteven Rostedt * This will return the event that will be read next, but does 4558f83c9d0fSSteven Rostedt * not increment the iterator. 4559f83c9d0fSSteven Rostedt */ 4560f83c9d0fSSteven Rostedt struct ring_buffer_event * 4561f83c9d0fSSteven Rostedt ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) 4562f83c9d0fSSteven Rostedt { 4563f83c9d0fSSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 4564f83c9d0fSSteven Rostedt struct ring_buffer_event *event; 4565f83c9d0fSSteven Rostedt unsigned long flags; 4566f83c9d0fSSteven Rostedt 45672d622719STom Zanussi again: 45685389f6faSThomas Gleixner raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 4569f83c9d0fSSteven Rostedt event = rb_iter_peek(iter, ts); 45705389f6faSThomas Gleixner raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 4571f83c9d0fSSteven Rostedt 45721b959e18SSteven Rostedt if (event && event->type_len == RINGBUF_TYPE_PADDING) 45732d622719STom Zanussi goto again; 45742d622719STom Zanussi 4575f83c9d0fSSteven Rostedt return event; 4576f83c9d0fSSteven Rostedt } 4577f83c9d0fSSteven Rostedt 4578f83c9d0fSSteven Rostedt /** 45797a8e76a3SSteven Rostedt * ring_buffer_consume - return an event and consume it 45807a8e76a3SSteven Rostedt * @buffer: The ring buffer to get the next event from 458166a8cb95SSteven Rostedt * @cpu: the cpu to read the buffer from 458266a8cb95SSteven Rostedt * @ts: a variable to store the timestamp (may be NULL) 458366a8cb95SSteven Rostedt * @lost_events: a variable to store if events were lost (may be NULL) 45847a8e76a3SSteven Rostedt * 45857a8e76a3SSteven Rostedt * Returns the next event in the ring buffer, and that event is consumed. 45867a8e76a3SSteven Rostedt * Meaning, that sequential reads will keep returning a different event, 45877a8e76a3SSteven Rostedt * and eventually empty the ring buffer if the producer is slower. 45887a8e76a3SSteven Rostedt */ 45897a8e76a3SSteven Rostedt struct ring_buffer_event * 459013292494SSteven Rostedt (VMware) ring_buffer_consume(struct trace_buffer *buffer, int cpu, u64 *ts, 459166a8cb95SSteven Rostedt unsigned long *lost_events) 45927a8e76a3SSteven Rostedt { 4593554f786eSSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer; 4594554f786eSSteven Rostedt struct ring_buffer_event *event = NULL; 4595f83c9d0fSSteven Rostedt unsigned long flags; 4596289a5a25SSteven Rostedt (Red Hat) bool dolock; 45977a8e76a3SSteven Rostedt 45982d622719STom Zanussi again: 4599554f786eSSteven Rostedt /* might be called in atomic */ 4600554f786eSSteven Rostedt preempt_disable(); 46017a8e76a3SSteven Rostedt 4602554f786eSSteven Rostedt if (!cpumask_test_cpu(cpu, buffer->cpumask)) 4603554f786eSSteven Rostedt goto out; 4604554f786eSSteven Rostedt 4605554f786eSSteven Rostedt cpu_buffer = buffer->buffers[cpu]; 46068d707e8eSSteven Rostedt local_irq_save(flags); 4607289a5a25SSteven Rostedt (Red Hat) dolock = rb_reader_lock(cpu_buffer); 46087a8e76a3SSteven Rostedt 460966a8cb95SSteven Rostedt event = rb_buffer_peek(cpu_buffer, ts, lost_events); 461066a8cb95SSteven Rostedt if (event) { 461166a8cb95SSteven Rostedt cpu_buffer->lost_events = 0; 4612d769041fSSteven Rostedt rb_advance_reader(cpu_buffer); 461366a8cb95SSteven Rostedt } 46147a8e76a3SSteven Rostedt 4615289a5a25SSteven Rostedt (Red Hat) rb_reader_unlock(cpu_buffer, dolock); 46168d707e8eSSteven Rostedt local_irq_restore(flags); 4617f83c9d0fSSteven Rostedt 4618554f786eSSteven Rostedt out: 4619554f786eSSteven Rostedt preempt_enable(); 4620554f786eSSteven Rostedt 46211b959e18SSteven Rostedt if (event && event->type_len == RINGBUF_TYPE_PADDING) 46222d622719STom Zanussi goto again; 46232d622719STom Zanussi 46247a8e76a3SSteven Rostedt return event; 46257a8e76a3SSteven Rostedt } 4626c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_consume); 46277a8e76a3SSteven Rostedt 46287a8e76a3SSteven Rostedt /** 462972c9ddfdSDavid Miller * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer 46307a8e76a3SSteven Rostedt * @buffer: The ring buffer to read from 46317a8e76a3SSteven Rostedt * @cpu: The cpu buffer to iterate over 463231b265b3SDouglas Anderson * @flags: gfp flags to use for memory allocation 46337a8e76a3SSteven Rostedt * 463472c9ddfdSDavid Miller * This performs the initial preparations necessary to iterate 463572c9ddfdSDavid Miller * through the buffer. Memory is allocated, buffer recording 463672c9ddfdSDavid Miller * is disabled, and the iterator pointer is returned to the caller. 46377a8e76a3SSteven Rostedt * 46386167c205SSteven Rostedt (VMware) * Disabling buffer recording prevents the reading from being 463972c9ddfdSDavid Miller * corrupted. This is not a consuming read, so a producer is not 464072c9ddfdSDavid Miller * expected. 464172c9ddfdSDavid Miller * 464272c9ddfdSDavid Miller * After a sequence of ring_buffer_read_prepare calls, the user is 4643d611851bSzhangwei(Jovi) * expected to make at least one call to ring_buffer_read_prepare_sync. 464472c9ddfdSDavid Miller * Afterwards, ring_buffer_read_start is invoked to get things going 464572c9ddfdSDavid Miller * for real. 464672c9ddfdSDavid Miller * 4647d611851bSzhangwei(Jovi) * This overall must be paired with ring_buffer_read_finish. 46487a8e76a3SSteven Rostedt */ 46497a8e76a3SSteven Rostedt struct ring_buffer_iter * 465013292494SSteven Rostedt (VMware) ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags) 46517a8e76a3SSteven Rostedt { 46527a8e76a3SSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer; 46538aabee57SSteven Rostedt struct ring_buffer_iter *iter; 46547a8e76a3SSteven Rostedt 46559e01c1b7SRusty Russell if (!cpumask_test_cpu(cpu, buffer->cpumask)) 46568aabee57SSteven Rostedt return NULL; 46577a8e76a3SSteven Rostedt 4658785888c5SSteven Rostedt (VMware) iter = kzalloc(sizeof(*iter), flags); 46597a8e76a3SSteven Rostedt if (!iter) 46608aabee57SSteven Rostedt return NULL; 46617a8e76a3SSteven Rostedt 4662785888c5SSteven Rostedt (VMware) iter->event = kmalloc(BUF_MAX_DATA_SIZE, flags); 4663785888c5SSteven Rostedt (VMware) if (!iter->event) { 4664785888c5SSteven Rostedt (VMware) kfree(iter); 4665785888c5SSteven Rostedt (VMware) return NULL; 4666785888c5SSteven Rostedt (VMware) } 4667785888c5SSteven Rostedt (VMware) 46687a8e76a3SSteven Rostedt cpu_buffer = buffer->buffers[cpu]; 46697a8e76a3SSteven Rostedt 46707a8e76a3SSteven Rostedt iter->cpu_buffer = cpu_buffer; 46717a8e76a3SSteven Rostedt 467207b8b10eSSteven Rostedt (VMware) atomic_inc(&cpu_buffer->resize_disabled); 467372c9ddfdSDavid Miller 467472c9ddfdSDavid Miller return iter; 467572c9ddfdSDavid Miller } 467672c9ddfdSDavid Miller EXPORT_SYMBOL_GPL(ring_buffer_read_prepare); 467772c9ddfdSDavid Miller 467872c9ddfdSDavid Miller /** 467972c9ddfdSDavid Miller * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls 468072c9ddfdSDavid Miller * 468172c9ddfdSDavid Miller * All previously invoked ring_buffer_read_prepare calls to prepare 468272c9ddfdSDavid Miller * iterators will be synchronized. Afterwards, read_buffer_read_start 468372c9ddfdSDavid Miller * calls on those iterators are allowed. 468472c9ddfdSDavid Miller */ 468572c9ddfdSDavid Miller void 468672c9ddfdSDavid Miller ring_buffer_read_prepare_sync(void) 468772c9ddfdSDavid Miller { 468874401729SPaul E. McKenney synchronize_rcu(); 468972c9ddfdSDavid Miller } 469072c9ddfdSDavid Miller EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync); 469172c9ddfdSDavid Miller 469272c9ddfdSDavid Miller /** 469372c9ddfdSDavid Miller * ring_buffer_read_start - start a non consuming read of the buffer 469472c9ddfdSDavid Miller * @iter: The iterator returned by ring_buffer_read_prepare 469572c9ddfdSDavid Miller * 469672c9ddfdSDavid Miller * This finalizes the startup of an iteration through the buffer. 469772c9ddfdSDavid Miller * The iterator comes from a call to ring_buffer_read_prepare and 469872c9ddfdSDavid Miller * an intervening ring_buffer_read_prepare_sync must have been 469972c9ddfdSDavid Miller * performed. 470072c9ddfdSDavid Miller * 4701d611851bSzhangwei(Jovi) * Must be paired with ring_buffer_read_finish. 470272c9ddfdSDavid Miller */ 470372c9ddfdSDavid Miller void 470472c9ddfdSDavid Miller ring_buffer_read_start(struct ring_buffer_iter *iter) 470572c9ddfdSDavid Miller { 470672c9ddfdSDavid Miller struct ring_buffer_per_cpu *cpu_buffer; 470772c9ddfdSDavid Miller unsigned long flags; 470872c9ddfdSDavid Miller 470972c9ddfdSDavid Miller if (!iter) 471072c9ddfdSDavid Miller return; 471172c9ddfdSDavid Miller 471272c9ddfdSDavid Miller cpu_buffer = iter->cpu_buffer; 47137a8e76a3SSteven Rostedt 47145389f6faSThomas Gleixner raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 47150199c4e6SThomas Gleixner arch_spin_lock(&cpu_buffer->lock); 4716642edba5SSteven Rostedt rb_iter_reset(iter); 47170199c4e6SThomas Gleixner arch_spin_unlock(&cpu_buffer->lock); 47185389f6faSThomas Gleixner raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 47197a8e76a3SSteven Rostedt } 4720c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_read_start); 47217a8e76a3SSteven Rostedt 47227a8e76a3SSteven Rostedt /** 4723d611851bSzhangwei(Jovi) * ring_buffer_read_finish - finish reading the iterator of the buffer 47247a8e76a3SSteven Rostedt * @iter: The iterator retrieved by ring_buffer_start 47257a8e76a3SSteven Rostedt * 47267a8e76a3SSteven Rostedt * This re-enables the recording to the buffer, and frees the 47277a8e76a3SSteven Rostedt * iterator. 47287a8e76a3SSteven Rostedt */ 47297a8e76a3SSteven Rostedt void 47307a8e76a3SSteven Rostedt ring_buffer_read_finish(struct ring_buffer_iter *iter) 47317a8e76a3SSteven Rostedt { 47327a8e76a3SSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 47339366c1baSSteven Rostedt unsigned long flags; 47347a8e76a3SSteven Rostedt 4735659f451fSSteven Rostedt /* 4736659f451fSSteven Rostedt * Ring buffer is disabled from recording, here's a good place 4737659f451fSSteven Rostedt * to check the integrity of the ring buffer. 47389366c1baSSteven Rostedt * Must prevent readers from trying to read, as the check 47399366c1baSSteven Rostedt * clears the HEAD page and readers require it. 4740659f451fSSteven Rostedt */ 47419366c1baSSteven Rostedt raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 4742659f451fSSteven Rostedt rb_check_pages(cpu_buffer); 47439366c1baSSteven Rostedt raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 4744659f451fSSteven Rostedt 474507b8b10eSSteven Rostedt (VMware) atomic_dec(&cpu_buffer->resize_disabled); 4746785888c5SSteven Rostedt (VMware) kfree(iter->event); 47477a8e76a3SSteven Rostedt kfree(iter); 47487a8e76a3SSteven Rostedt } 4749c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_read_finish); 47507a8e76a3SSteven Rostedt 47517a8e76a3SSteven Rostedt /** 4752bc1a72afSSteven Rostedt (VMware) * ring_buffer_iter_advance - advance the iterator to the next location 47537a8e76a3SSteven Rostedt * @iter: The ring buffer iterator 47547a8e76a3SSteven Rostedt * 4755bc1a72afSSteven Rostedt (VMware) * Move the location of the iterator such that the next read will 4756bc1a72afSSteven Rostedt (VMware) * be the next location of the iterator. 47577a8e76a3SSteven Rostedt */ 4758bc1a72afSSteven Rostedt (VMware) void ring_buffer_iter_advance(struct ring_buffer_iter *iter) 47597a8e76a3SSteven Rostedt { 4760f83c9d0fSSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 4761f83c9d0fSSteven Rostedt unsigned long flags; 47627a8e76a3SSteven Rostedt 47635389f6faSThomas Gleixner raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 47647e9391cfSSteven Rostedt 47657a8e76a3SSteven Rostedt rb_advance_iter(iter); 47667a8e76a3SSteven Rostedt 4767bc1a72afSSteven Rostedt (VMware) raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 47687a8e76a3SSteven Rostedt } 4769bc1a72afSSteven Rostedt (VMware) EXPORT_SYMBOL_GPL(ring_buffer_iter_advance); 47707a8e76a3SSteven Rostedt 47717a8e76a3SSteven Rostedt /** 47727a8e76a3SSteven Rostedt * ring_buffer_size - return the size of the ring buffer (in bytes) 47737a8e76a3SSteven Rostedt * @buffer: The ring buffer. 477459e7cffeSFabian Frederick * @cpu: The CPU to get ring buffer size from. 47757a8e76a3SSteven Rostedt */ 477613292494SSteven Rostedt (VMware) unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu) 47777a8e76a3SSteven Rostedt { 4778438ced17SVaibhav Nagarnaik /* 4779438ced17SVaibhav Nagarnaik * Earlier, this method returned 4780438ced17SVaibhav Nagarnaik * BUF_PAGE_SIZE * buffer->nr_pages 4781438ced17SVaibhav Nagarnaik * Since the nr_pages field is now removed, we have converted this to 4782438ced17SVaibhav Nagarnaik * return the per cpu buffer value. 4783438ced17SVaibhav Nagarnaik */ 4784438ced17SVaibhav Nagarnaik if (!cpumask_test_cpu(cpu, buffer->cpumask)) 4785438ced17SVaibhav Nagarnaik return 0; 4786438ced17SVaibhav Nagarnaik 4787438ced17SVaibhav Nagarnaik return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages; 47887a8e76a3SSteven Rostedt } 4789c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_size); 47907a8e76a3SSteven Rostedt 47917a8e76a3SSteven Rostedt static void 47927a8e76a3SSteven Rostedt rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) 47937a8e76a3SSteven Rostedt { 479477ae365eSSteven Rostedt rb_head_page_deactivate(cpu_buffer); 479577ae365eSSteven Rostedt 47967a8e76a3SSteven Rostedt cpu_buffer->head_page 47973adc54faSSteven Rostedt = list_entry(cpu_buffer->pages, struct buffer_page, list); 4798bf41a158SSteven Rostedt local_set(&cpu_buffer->head_page->write, 0); 4799778c55d4SSteven Rostedt local_set(&cpu_buffer->head_page->entries, 0); 4800abc9b56dSSteven Rostedt local_set(&cpu_buffer->head_page->page->commit, 0); 48017a8e76a3SSteven Rostedt 48026f807acdSSteven Rostedt cpu_buffer->head_page->read = 0; 4803bf41a158SSteven Rostedt 4804bf41a158SSteven Rostedt cpu_buffer->tail_page = cpu_buffer->head_page; 4805bf41a158SSteven Rostedt cpu_buffer->commit_page = cpu_buffer->head_page; 4806bf41a158SSteven Rostedt 4807bf41a158SSteven Rostedt INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 48085040b4b7SVaibhav Nagarnaik INIT_LIST_HEAD(&cpu_buffer->new_pages); 4809bf41a158SSteven Rostedt local_set(&cpu_buffer->reader_page->write, 0); 4810778c55d4SSteven Rostedt local_set(&cpu_buffer->reader_page->entries, 0); 4811abc9b56dSSteven Rostedt local_set(&cpu_buffer->reader_page->page->commit, 0); 48126f807acdSSteven Rostedt cpu_buffer->reader_page->read = 0; 4813d769041fSSteven Rostedt 4814c64e148aSVaibhav Nagarnaik local_set(&cpu_buffer->entries_bytes, 0); 481577ae365eSSteven Rostedt local_set(&cpu_buffer->overrun, 0); 4816884bfe89SSlava Pestov local_set(&cpu_buffer->commit_overrun, 0); 4817884bfe89SSlava Pestov local_set(&cpu_buffer->dropped_events, 0); 4818e4906effSSteven Rostedt local_set(&cpu_buffer->entries, 0); 4819fa743953SSteven Rostedt local_set(&cpu_buffer->committing, 0); 4820fa743953SSteven Rostedt local_set(&cpu_buffer->commits, 0); 48212c2b0a78SSteven Rostedt (VMware) local_set(&cpu_buffer->pages_touched, 0); 48222c2b0a78SSteven Rostedt (VMware) local_set(&cpu_buffer->pages_read, 0); 482303329f99SSteven Rostedt (VMware) cpu_buffer->last_pages_touch = 0; 48242c2b0a78SSteven Rostedt (VMware) cpu_buffer->shortest_full = 0; 482577ae365eSSteven Rostedt cpu_buffer->read = 0; 4826c64e148aSVaibhav Nagarnaik cpu_buffer->read_bytes = 0; 482769507c06SSteven Rostedt 482810464b4aSSteven Rostedt (VMware) rb_time_set(&cpu_buffer->write_stamp, 0); 482910464b4aSSteven Rostedt (VMware) rb_time_set(&cpu_buffer->before_stamp, 0); 483077ae365eSSteven Rostedt 483166a8cb95SSteven Rostedt cpu_buffer->lost_events = 0; 483266a8cb95SSteven Rostedt cpu_buffer->last_overrun = 0; 483366a8cb95SSteven Rostedt 483477ae365eSSteven Rostedt rb_head_page_activate(cpu_buffer); 48357a8e76a3SSteven Rostedt } 48367a8e76a3SSteven Rostedt 4837b23d7a5fSNicholas Piggin /* Must have disabled the cpu buffer then done a synchronize_rcu */ 4838b23d7a5fSNicholas Piggin static void reset_disabled_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) 48397a8e76a3SSteven Rostedt { 48407a8e76a3SSteven Rostedt unsigned long flags; 48417a8e76a3SSteven Rostedt 48425389f6faSThomas Gleixner raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 4843f83c9d0fSSteven Rostedt 484441b6a95dSSteven Rostedt if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) 484541b6a95dSSteven Rostedt goto out; 484641b6a95dSSteven Rostedt 48470199c4e6SThomas Gleixner arch_spin_lock(&cpu_buffer->lock); 48487a8e76a3SSteven Rostedt 48497a8e76a3SSteven Rostedt rb_reset_cpu(cpu_buffer); 48507a8e76a3SSteven Rostedt 48510199c4e6SThomas Gleixner arch_spin_unlock(&cpu_buffer->lock); 4852f83c9d0fSSteven Rostedt 485341b6a95dSSteven Rostedt out: 48545389f6faSThomas Gleixner raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 4855b23d7a5fSNicholas Piggin } 4856b23d7a5fSNicholas Piggin 4857b23d7a5fSNicholas Piggin /** 4858b23d7a5fSNicholas Piggin * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer 4859b23d7a5fSNicholas Piggin * @buffer: The ring buffer to reset a per cpu buffer of 4860b23d7a5fSNicholas Piggin * @cpu: The CPU buffer to be reset 4861b23d7a5fSNicholas Piggin */ 4862b23d7a5fSNicholas Piggin void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu) 4863b23d7a5fSNicholas Piggin { 4864b23d7a5fSNicholas Piggin struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 4865b23d7a5fSNicholas Piggin 4866b23d7a5fSNicholas Piggin if (!cpumask_test_cpu(cpu, buffer->cpumask)) 4867b23d7a5fSNicholas Piggin return; 4868b23d7a5fSNicholas Piggin 4869bbeb9746SGaurav Kohli /* prevent another thread from changing buffer sizes */ 4870bbeb9746SGaurav Kohli mutex_lock(&buffer->mutex); 4871bbeb9746SGaurav Kohli 4872b23d7a5fSNicholas Piggin atomic_inc(&cpu_buffer->resize_disabled); 4873b23d7a5fSNicholas Piggin atomic_inc(&cpu_buffer->record_disabled); 4874b23d7a5fSNicholas Piggin 4875b23d7a5fSNicholas Piggin /* Make sure all commits have finished */ 4876b23d7a5fSNicholas Piggin synchronize_rcu(); 4877b23d7a5fSNicholas Piggin 4878b23d7a5fSNicholas Piggin reset_disabled_cpu_buffer(cpu_buffer); 487941ede23eSSteven Rostedt 488041ede23eSSteven Rostedt atomic_dec(&cpu_buffer->record_disabled); 488107b8b10eSSteven Rostedt (VMware) atomic_dec(&cpu_buffer->resize_disabled); 4882bbeb9746SGaurav Kohli 4883bbeb9746SGaurav Kohli mutex_unlock(&buffer->mutex); 48847a8e76a3SSteven Rostedt } 4885c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); 48867a8e76a3SSteven Rostedt 48877a8e76a3SSteven Rostedt /** 4888b23d7a5fSNicholas Piggin * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer 4889b23d7a5fSNicholas Piggin * @buffer: The ring buffer to reset a per cpu buffer of 4890b23d7a5fSNicholas Piggin * @cpu: The CPU buffer to be reset 4891b23d7a5fSNicholas Piggin */ 4892b23d7a5fSNicholas Piggin void ring_buffer_reset_online_cpus(struct trace_buffer *buffer) 4893b23d7a5fSNicholas Piggin { 4894b23d7a5fSNicholas Piggin struct ring_buffer_per_cpu *cpu_buffer; 4895b23d7a5fSNicholas Piggin int cpu; 4896b23d7a5fSNicholas Piggin 4897bbeb9746SGaurav Kohli /* prevent another thread from changing buffer sizes */ 4898bbeb9746SGaurav Kohli mutex_lock(&buffer->mutex); 4899bbeb9746SGaurav Kohli 4900b23d7a5fSNicholas Piggin for_each_online_buffer_cpu(buffer, cpu) { 4901b23d7a5fSNicholas Piggin cpu_buffer = buffer->buffers[cpu]; 4902b23d7a5fSNicholas Piggin 4903b23d7a5fSNicholas Piggin atomic_inc(&cpu_buffer->resize_disabled); 4904b23d7a5fSNicholas Piggin atomic_inc(&cpu_buffer->record_disabled); 4905b23d7a5fSNicholas Piggin } 4906b23d7a5fSNicholas Piggin 4907b23d7a5fSNicholas Piggin /* Make sure all commits have finished */ 4908b23d7a5fSNicholas Piggin synchronize_rcu(); 4909b23d7a5fSNicholas Piggin 4910b23d7a5fSNicholas Piggin for_each_online_buffer_cpu(buffer, cpu) { 4911b23d7a5fSNicholas Piggin cpu_buffer = buffer->buffers[cpu]; 4912b23d7a5fSNicholas Piggin 4913b23d7a5fSNicholas Piggin reset_disabled_cpu_buffer(cpu_buffer); 4914b23d7a5fSNicholas Piggin 4915b23d7a5fSNicholas Piggin atomic_dec(&cpu_buffer->record_disabled); 4916b23d7a5fSNicholas Piggin atomic_dec(&cpu_buffer->resize_disabled); 4917b23d7a5fSNicholas Piggin } 4918bbeb9746SGaurav Kohli 4919bbeb9746SGaurav Kohli mutex_unlock(&buffer->mutex); 4920b23d7a5fSNicholas Piggin } 4921b23d7a5fSNicholas Piggin 4922b23d7a5fSNicholas Piggin /** 49237a8e76a3SSteven Rostedt * ring_buffer_reset - reset a ring buffer 49247a8e76a3SSteven Rostedt * @buffer: The ring buffer to reset all cpu buffers 49257a8e76a3SSteven Rostedt */ 492613292494SSteven Rostedt (VMware) void ring_buffer_reset(struct trace_buffer *buffer) 49277a8e76a3SSteven Rostedt { 4928b23d7a5fSNicholas Piggin struct ring_buffer_per_cpu *cpu_buffer; 49297a8e76a3SSteven Rostedt int cpu; 49307a8e76a3SSteven Rostedt 4931b23d7a5fSNicholas Piggin for_each_buffer_cpu(buffer, cpu) { 4932b23d7a5fSNicholas Piggin cpu_buffer = buffer->buffers[cpu]; 4933b23d7a5fSNicholas Piggin 4934b23d7a5fSNicholas Piggin atomic_inc(&cpu_buffer->resize_disabled); 4935b23d7a5fSNicholas Piggin atomic_inc(&cpu_buffer->record_disabled); 4936b23d7a5fSNicholas Piggin } 4937b23d7a5fSNicholas Piggin 4938b23d7a5fSNicholas Piggin /* Make sure all commits have finished */ 4939b23d7a5fSNicholas Piggin synchronize_rcu(); 4940b23d7a5fSNicholas Piggin 4941b23d7a5fSNicholas Piggin for_each_buffer_cpu(buffer, cpu) { 4942b23d7a5fSNicholas Piggin cpu_buffer = buffer->buffers[cpu]; 4943b23d7a5fSNicholas Piggin 4944b23d7a5fSNicholas Piggin reset_disabled_cpu_buffer(cpu_buffer); 4945b23d7a5fSNicholas Piggin 4946b23d7a5fSNicholas Piggin atomic_dec(&cpu_buffer->record_disabled); 4947b23d7a5fSNicholas Piggin atomic_dec(&cpu_buffer->resize_disabled); 4948b23d7a5fSNicholas Piggin } 49497a8e76a3SSteven Rostedt } 4950c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_reset); 49517a8e76a3SSteven Rostedt 49527a8e76a3SSteven Rostedt /** 49537a8e76a3SSteven Rostedt * rind_buffer_empty - is the ring buffer empty? 49547a8e76a3SSteven Rostedt * @buffer: The ring buffer to test 49557a8e76a3SSteven Rostedt */ 495613292494SSteven Rostedt (VMware) bool ring_buffer_empty(struct trace_buffer *buffer) 49577a8e76a3SSteven Rostedt { 49587a8e76a3SSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer; 4959d4788207SSteven Rostedt unsigned long flags; 4960289a5a25SSteven Rostedt (Red Hat) bool dolock; 49617a8e76a3SSteven Rostedt int cpu; 4962d4788207SSteven Rostedt int ret; 49637a8e76a3SSteven Rostedt 49647a8e76a3SSteven Rostedt /* yes this is racy, but if you don't like the race, lock the buffer */ 49657a8e76a3SSteven Rostedt for_each_buffer_cpu(buffer, cpu) { 49667a8e76a3SSteven Rostedt cpu_buffer = buffer->buffers[cpu]; 49678d707e8eSSteven Rostedt local_irq_save(flags); 4968289a5a25SSteven Rostedt (Red Hat) dolock = rb_reader_lock(cpu_buffer); 4969d4788207SSteven Rostedt ret = rb_per_cpu_empty(cpu_buffer); 4970289a5a25SSteven Rostedt (Red Hat) rb_reader_unlock(cpu_buffer, dolock); 49718d707e8eSSteven Rostedt local_irq_restore(flags); 49728d707e8eSSteven Rostedt 4973d4788207SSteven Rostedt if (!ret) 49743d4e204dSYaowei Bai return false; 49757a8e76a3SSteven Rostedt } 4976554f786eSSteven Rostedt 49773d4e204dSYaowei Bai return true; 49787a8e76a3SSteven Rostedt } 4979c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_empty); 49807a8e76a3SSteven Rostedt 49817a8e76a3SSteven Rostedt /** 49827a8e76a3SSteven Rostedt * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty? 49837a8e76a3SSteven Rostedt * @buffer: The ring buffer 49847a8e76a3SSteven Rostedt * @cpu: The CPU buffer to test 49857a8e76a3SSteven Rostedt */ 498613292494SSteven Rostedt (VMware) bool ring_buffer_empty_cpu(struct trace_buffer *buffer, int cpu) 49877a8e76a3SSteven Rostedt { 49887a8e76a3SSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer; 4989d4788207SSteven Rostedt unsigned long flags; 4990289a5a25SSteven Rostedt (Red Hat) bool dolock; 49918aabee57SSteven Rostedt int ret; 49927a8e76a3SSteven Rostedt 49939e01c1b7SRusty Russell if (!cpumask_test_cpu(cpu, buffer->cpumask)) 49943d4e204dSYaowei Bai return true; 49957a8e76a3SSteven Rostedt 49967a8e76a3SSteven Rostedt cpu_buffer = buffer->buffers[cpu]; 49978d707e8eSSteven Rostedt local_irq_save(flags); 4998289a5a25SSteven Rostedt (Red Hat) dolock = rb_reader_lock(cpu_buffer); 4999554f786eSSteven Rostedt ret = rb_per_cpu_empty(cpu_buffer); 5000289a5a25SSteven Rostedt (Red Hat) rb_reader_unlock(cpu_buffer, dolock); 50018d707e8eSSteven Rostedt local_irq_restore(flags); 5002554f786eSSteven Rostedt 5003554f786eSSteven Rostedt return ret; 50047a8e76a3SSteven Rostedt } 5005c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); 50067a8e76a3SSteven Rostedt 500785bac32cSSteven Rostedt #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP 50087a8e76a3SSteven Rostedt /** 50097a8e76a3SSteven Rostedt * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers 50107a8e76a3SSteven Rostedt * @buffer_a: One buffer to swap with 50117a8e76a3SSteven Rostedt * @buffer_b: The other buffer to swap with 501259e7cffeSFabian Frederick * @cpu: the CPU of the buffers to swap 50137a8e76a3SSteven Rostedt * 50147a8e76a3SSteven Rostedt * This function is useful for tracers that want to take a "snapshot" 50157a8e76a3SSteven Rostedt * of a CPU buffer and has another back up buffer lying around. 50167a8e76a3SSteven Rostedt * it is expected that the tracer handles the cpu buffer not being 50177a8e76a3SSteven Rostedt * used at the moment. 50187a8e76a3SSteven Rostedt */ 501913292494SSteven Rostedt (VMware) int ring_buffer_swap_cpu(struct trace_buffer *buffer_a, 502013292494SSteven Rostedt (VMware) struct trace_buffer *buffer_b, int cpu) 50217a8e76a3SSteven Rostedt { 50227a8e76a3SSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer_a; 50237a8e76a3SSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer_b; 5024554f786eSSteven Rostedt int ret = -EINVAL; 5025554f786eSSteven Rostedt 50269e01c1b7SRusty Russell if (!cpumask_test_cpu(cpu, buffer_a->cpumask) || 50279e01c1b7SRusty Russell !cpumask_test_cpu(cpu, buffer_b->cpumask)) 5028554f786eSSteven Rostedt goto out; 50297a8e76a3SSteven Rostedt 5030438ced17SVaibhav Nagarnaik cpu_buffer_a = buffer_a->buffers[cpu]; 5031438ced17SVaibhav Nagarnaik cpu_buffer_b = buffer_b->buffers[cpu]; 5032438ced17SVaibhav Nagarnaik 50337a8e76a3SSteven Rostedt /* At least make sure the two buffers are somewhat the same */ 5034438ced17SVaibhav Nagarnaik if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages) 5035554f786eSSteven Rostedt goto out; 5036554f786eSSteven Rostedt 5037554f786eSSteven Rostedt ret = -EAGAIN; 50387a8e76a3SSteven Rostedt 503997b17efeSSteven Rostedt if (atomic_read(&buffer_a->record_disabled)) 5040554f786eSSteven Rostedt goto out; 504197b17efeSSteven Rostedt 504297b17efeSSteven Rostedt if (atomic_read(&buffer_b->record_disabled)) 5043554f786eSSteven Rostedt goto out; 504497b17efeSSteven Rostedt 504597b17efeSSteven Rostedt if (atomic_read(&cpu_buffer_a->record_disabled)) 5046554f786eSSteven Rostedt goto out; 504797b17efeSSteven Rostedt 504897b17efeSSteven Rostedt if (atomic_read(&cpu_buffer_b->record_disabled)) 5049554f786eSSteven Rostedt goto out; 505097b17efeSSteven Rostedt 50517a8e76a3SSteven Rostedt /* 505274401729SPaul E. McKenney * We can't do a synchronize_rcu here because this 50537a8e76a3SSteven Rostedt * function can be called in atomic context. 50547a8e76a3SSteven Rostedt * Normally this will be called from the same CPU as cpu. 50557a8e76a3SSteven Rostedt * If not it's up to the caller to protect this. 50567a8e76a3SSteven Rostedt */ 50577a8e76a3SSteven Rostedt atomic_inc(&cpu_buffer_a->record_disabled); 50587a8e76a3SSteven Rostedt atomic_inc(&cpu_buffer_b->record_disabled); 50597a8e76a3SSteven Rostedt 506098277991SSteven Rostedt ret = -EBUSY; 506198277991SSteven Rostedt if (local_read(&cpu_buffer_a->committing)) 506298277991SSteven Rostedt goto out_dec; 506398277991SSteven Rostedt if (local_read(&cpu_buffer_b->committing)) 506498277991SSteven Rostedt goto out_dec; 506598277991SSteven Rostedt 50667a8e76a3SSteven Rostedt buffer_a->buffers[cpu] = cpu_buffer_b; 50677a8e76a3SSteven Rostedt buffer_b->buffers[cpu] = cpu_buffer_a; 50687a8e76a3SSteven Rostedt 50697a8e76a3SSteven Rostedt cpu_buffer_b->buffer = buffer_a; 50707a8e76a3SSteven Rostedt cpu_buffer_a->buffer = buffer_b; 50717a8e76a3SSteven Rostedt 507298277991SSteven Rostedt ret = 0; 507398277991SSteven Rostedt 507498277991SSteven Rostedt out_dec: 50757a8e76a3SSteven Rostedt atomic_dec(&cpu_buffer_a->record_disabled); 50767a8e76a3SSteven Rostedt atomic_dec(&cpu_buffer_b->record_disabled); 5077554f786eSSteven Rostedt out: 5078554f786eSSteven Rostedt return ret; 50797a8e76a3SSteven Rostedt } 5080c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); 508185bac32cSSteven Rostedt #endif /* CONFIG_RING_BUFFER_ALLOW_SWAP */ 50827a8e76a3SSteven Rostedt 50838789a9e7SSteven Rostedt /** 50848789a9e7SSteven Rostedt * ring_buffer_alloc_read_page - allocate a page to read from buffer 50858789a9e7SSteven Rostedt * @buffer: the buffer to allocate for. 5086d611851bSzhangwei(Jovi) * @cpu: the cpu buffer to allocate. 50878789a9e7SSteven Rostedt * 50888789a9e7SSteven Rostedt * This function is used in conjunction with ring_buffer_read_page. 50898789a9e7SSteven Rostedt * When reading a full page from the ring buffer, these functions 50908789a9e7SSteven Rostedt * can be used to speed up the process. The calling function should 50918789a9e7SSteven Rostedt * allocate a few pages first with this function. Then when it 50928789a9e7SSteven Rostedt * needs to get pages from the ring buffer, it passes the result 50938789a9e7SSteven Rostedt * of this function into ring_buffer_read_page, which will swap 50948789a9e7SSteven Rostedt * the page that was allocated, with the read page of the buffer. 50958789a9e7SSteven Rostedt * 50968789a9e7SSteven Rostedt * Returns: 5097a7e52ad7SSteven Rostedt (VMware) * The page allocated, or ERR_PTR 50988789a9e7SSteven Rostedt */ 509913292494SSteven Rostedt (VMware) void *ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu) 51008789a9e7SSteven Rostedt { 5101a7e52ad7SSteven Rostedt (VMware) struct ring_buffer_per_cpu *cpu_buffer; 510273a757e6SSteven Rostedt (VMware) struct buffer_data_page *bpage = NULL; 510373a757e6SSteven Rostedt (VMware) unsigned long flags; 51047ea59064SVaibhav Nagarnaik struct page *page; 51058789a9e7SSteven Rostedt 5106a7e52ad7SSteven Rostedt (VMware) if (!cpumask_test_cpu(cpu, buffer->cpumask)) 5107a7e52ad7SSteven Rostedt (VMware) return ERR_PTR(-ENODEV); 5108a7e52ad7SSteven Rostedt (VMware) 5109a7e52ad7SSteven Rostedt (VMware) cpu_buffer = buffer->buffers[cpu]; 511073a757e6SSteven Rostedt (VMware) local_irq_save(flags); 511173a757e6SSteven Rostedt (VMware) arch_spin_lock(&cpu_buffer->lock); 511273a757e6SSteven Rostedt (VMware) 511373a757e6SSteven Rostedt (VMware) if (cpu_buffer->free_page) { 511473a757e6SSteven Rostedt (VMware) bpage = cpu_buffer->free_page; 511573a757e6SSteven Rostedt (VMware) cpu_buffer->free_page = NULL; 511673a757e6SSteven Rostedt (VMware) } 511773a757e6SSteven Rostedt (VMware) 511873a757e6SSteven Rostedt (VMware) arch_spin_unlock(&cpu_buffer->lock); 511973a757e6SSteven Rostedt (VMware) local_irq_restore(flags); 512073a757e6SSteven Rostedt (VMware) 512173a757e6SSteven Rostedt (VMware) if (bpage) 512273a757e6SSteven Rostedt (VMware) goto out; 512373a757e6SSteven Rostedt (VMware) 5124d7ec4bfeSVaibhav Nagarnaik page = alloc_pages_node(cpu_to_node(cpu), 5125d7ec4bfeSVaibhav Nagarnaik GFP_KERNEL | __GFP_NORETRY, 0); 51267ea59064SVaibhav Nagarnaik if (!page) 5127a7e52ad7SSteven Rostedt (VMware) return ERR_PTR(-ENOMEM); 51288789a9e7SSteven Rostedt 51297ea59064SVaibhav Nagarnaik bpage = page_address(page); 51308789a9e7SSteven Rostedt 513173a757e6SSteven Rostedt (VMware) out: 5132ef7a4a16SSteven Rostedt rb_init_page(bpage); 5133ef7a4a16SSteven Rostedt 5134044fa782SSteven Rostedt return bpage; 51358789a9e7SSteven Rostedt } 5136d6ce96daSSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page); 51378789a9e7SSteven Rostedt 51388789a9e7SSteven Rostedt /** 51398789a9e7SSteven Rostedt * ring_buffer_free_read_page - free an allocated read page 51408789a9e7SSteven Rostedt * @buffer: the buffer the page was allocate for 514173a757e6SSteven Rostedt (VMware) * @cpu: the cpu buffer the page came from 51428789a9e7SSteven Rostedt * @data: the page to free 51438789a9e7SSteven Rostedt * 51448789a9e7SSteven Rostedt * Free a page allocated from ring_buffer_alloc_read_page. 51458789a9e7SSteven Rostedt */ 514613292494SSteven Rostedt (VMware) void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, void *data) 51478789a9e7SSteven Rostedt { 514873a757e6SSteven Rostedt (VMware) struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 514973a757e6SSteven Rostedt (VMware) struct buffer_data_page *bpage = data; 5150ae415fa4SSteven Rostedt (VMware) struct page *page = virt_to_page(bpage); 515173a757e6SSteven Rostedt (VMware) unsigned long flags; 515273a757e6SSteven Rostedt (VMware) 5153ae415fa4SSteven Rostedt (VMware) /* If the page is still in use someplace else, we can't reuse it */ 5154ae415fa4SSteven Rostedt (VMware) if (page_ref_count(page) > 1) 5155ae415fa4SSteven Rostedt (VMware) goto out; 5156ae415fa4SSteven Rostedt (VMware) 515773a757e6SSteven Rostedt (VMware) local_irq_save(flags); 515873a757e6SSteven Rostedt (VMware) arch_spin_lock(&cpu_buffer->lock); 515973a757e6SSteven Rostedt (VMware) 516073a757e6SSteven Rostedt (VMware) if (!cpu_buffer->free_page) { 516173a757e6SSteven Rostedt (VMware) cpu_buffer->free_page = bpage; 516273a757e6SSteven Rostedt (VMware) bpage = NULL; 516373a757e6SSteven Rostedt (VMware) } 516473a757e6SSteven Rostedt (VMware) 516573a757e6SSteven Rostedt (VMware) arch_spin_unlock(&cpu_buffer->lock); 516673a757e6SSteven Rostedt (VMware) local_irq_restore(flags); 516773a757e6SSteven Rostedt (VMware) 5168ae415fa4SSteven Rostedt (VMware) out: 516973a757e6SSteven Rostedt (VMware) free_page((unsigned long)bpage); 51708789a9e7SSteven Rostedt } 5171d6ce96daSSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); 51728789a9e7SSteven Rostedt 51738789a9e7SSteven Rostedt /** 51748789a9e7SSteven Rostedt * ring_buffer_read_page - extract a page from the ring buffer 51758789a9e7SSteven Rostedt * @buffer: buffer to extract from 51768789a9e7SSteven Rostedt * @data_page: the page to use allocated from ring_buffer_alloc_read_page 5177ef7a4a16SSteven Rostedt * @len: amount to extract 51788789a9e7SSteven Rostedt * @cpu: the cpu of the buffer to extract 51798789a9e7SSteven Rostedt * @full: should the extraction only happen when the page is full. 51808789a9e7SSteven Rostedt * 51818789a9e7SSteven Rostedt * This function will pull out a page from the ring buffer and consume it. 51828789a9e7SSteven Rostedt * @data_page must be the address of the variable that was returned 51838789a9e7SSteven Rostedt * from ring_buffer_alloc_read_page. This is because the page might be used 51848789a9e7SSteven Rostedt * to swap with a page in the ring buffer. 51858789a9e7SSteven Rostedt * 51868789a9e7SSteven Rostedt * for example: 5187d611851bSzhangwei(Jovi) * rpage = ring_buffer_alloc_read_page(buffer, cpu); 5188a7e52ad7SSteven Rostedt (VMware) * if (IS_ERR(rpage)) 5189a7e52ad7SSteven Rostedt (VMware) * return PTR_ERR(rpage); 5190ef7a4a16SSteven Rostedt * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0); 5191667d2412SLai Jiangshan * if (ret >= 0) 5192667d2412SLai Jiangshan * process_page(rpage, ret); 51938789a9e7SSteven Rostedt * 51948789a9e7SSteven Rostedt * When @full is set, the function will not return true unless 51958789a9e7SSteven Rostedt * the writer is off the reader page. 51968789a9e7SSteven Rostedt * 51978789a9e7SSteven Rostedt * Note: it is up to the calling functions to handle sleeps and wakeups. 51988789a9e7SSteven Rostedt * The ring buffer can be used anywhere in the kernel and can not 51998789a9e7SSteven Rostedt * blindly call wake_up. The layer that uses the ring buffer must be 52008789a9e7SSteven Rostedt * responsible for that. 52018789a9e7SSteven Rostedt * 52028789a9e7SSteven Rostedt * Returns: 5203667d2412SLai Jiangshan * >=0 if data has been transferred, returns the offset of consumed data. 5204667d2412SLai Jiangshan * <0 if no data has been transferred. 52058789a9e7SSteven Rostedt */ 520613292494SSteven Rostedt (VMware) int ring_buffer_read_page(struct trace_buffer *buffer, 5207ef7a4a16SSteven Rostedt void **data_page, size_t len, int cpu, int full) 52088789a9e7SSteven Rostedt { 52098789a9e7SSteven Rostedt struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 52108789a9e7SSteven Rostedt struct ring_buffer_event *event; 5211044fa782SSteven Rostedt struct buffer_data_page *bpage; 5212ef7a4a16SSteven Rostedt struct buffer_page *reader; 5213ff0ff84aSSteven Rostedt unsigned long missed_events; 52148789a9e7SSteven Rostedt unsigned long flags; 5215ef7a4a16SSteven Rostedt unsigned int commit; 5216667d2412SLai Jiangshan unsigned int read; 52174f3640f8SSteven Rostedt u64 save_timestamp; 5218667d2412SLai Jiangshan int ret = -1; 52198789a9e7SSteven Rostedt 5220554f786eSSteven Rostedt if (!cpumask_test_cpu(cpu, buffer->cpumask)) 5221554f786eSSteven Rostedt goto out; 5222554f786eSSteven Rostedt 5223474d32b6SSteven Rostedt /* 5224474d32b6SSteven Rostedt * If len is not big enough to hold the page header, then 5225474d32b6SSteven Rostedt * we can not copy anything. 5226474d32b6SSteven Rostedt */ 5227474d32b6SSteven Rostedt if (len <= BUF_PAGE_HDR_SIZE) 5228554f786eSSteven Rostedt goto out; 5229474d32b6SSteven Rostedt 5230474d32b6SSteven Rostedt len -= BUF_PAGE_HDR_SIZE; 5231474d32b6SSteven Rostedt 52328789a9e7SSteven Rostedt if (!data_page) 5233554f786eSSteven Rostedt goto out; 52348789a9e7SSteven Rostedt 5235044fa782SSteven Rostedt bpage = *data_page; 5236044fa782SSteven Rostedt if (!bpage) 5237554f786eSSteven Rostedt goto out; 52388789a9e7SSteven Rostedt 52395389f6faSThomas Gleixner raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 52408789a9e7SSteven Rostedt 5241ef7a4a16SSteven Rostedt reader = rb_get_reader_page(cpu_buffer); 5242ef7a4a16SSteven Rostedt if (!reader) 5243554f786eSSteven Rostedt goto out_unlock; 52448789a9e7SSteven Rostedt 5245ef7a4a16SSteven Rostedt event = rb_reader_event(cpu_buffer); 5246667d2412SLai Jiangshan 5247ef7a4a16SSteven Rostedt read = reader->read; 5248ef7a4a16SSteven Rostedt commit = rb_page_commit(reader); 5249ef7a4a16SSteven Rostedt 525066a8cb95SSteven Rostedt /* Check if any events were dropped */ 5251ff0ff84aSSteven Rostedt missed_events = cpu_buffer->lost_events; 525266a8cb95SSteven Rostedt 52538789a9e7SSteven Rostedt /* 5254474d32b6SSteven Rostedt * If this page has been partially read or 5255474d32b6SSteven Rostedt * if len is not big enough to read the rest of the page or 5256474d32b6SSteven Rostedt * a writer is still on the page, then 5257474d32b6SSteven Rostedt * we must copy the data from the page to the buffer. 5258474d32b6SSteven Rostedt * Otherwise, we can simply swap the page with the one passed in. 52598789a9e7SSteven Rostedt */ 5260474d32b6SSteven Rostedt if (read || (len < (commit - read)) || 5261ef7a4a16SSteven Rostedt cpu_buffer->reader_page == cpu_buffer->commit_page) { 5262667d2412SLai Jiangshan struct buffer_data_page *rpage = cpu_buffer->reader_page->page; 5263474d32b6SSteven Rostedt unsigned int rpos = read; 5264474d32b6SSteven Rostedt unsigned int pos = 0; 5265ef7a4a16SSteven Rostedt unsigned int size; 52668789a9e7SSteven Rostedt 52678789a9e7SSteven Rostedt if (full) 5268554f786eSSteven Rostedt goto out_unlock; 52698789a9e7SSteven Rostedt 5270ef7a4a16SSteven Rostedt if (len > (commit - read)) 5271ef7a4a16SSteven Rostedt len = (commit - read); 5272ef7a4a16SSteven Rostedt 527369d1b839SSteven Rostedt /* Always keep the time extend and data together */ 527469d1b839SSteven Rostedt size = rb_event_ts_length(event); 5275ef7a4a16SSteven Rostedt 5276ef7a4a16SSteven Rostedt if (len < size) 5277554f786eSSteven Rostedt goto out_unlock; 5278ef7a4a16SSteven Rostedt 52794f3640f8SSteven Rostedt /* save the current timestamp, since the user will need it */ 52804f3640f8SSteven Rostedt save_timestamp = cpu_buffer->read_stamp; 52814f3640f8SSteven Rostedt 5282ef7a4a16SSteven Rostedt /* Need to copy one event at a time */ 5283ef7a4a16SSteven Rostedt do { 5284e1e35927SDavid Sharp /* We need the size of one event, because 5285e1e35927SDavid Sharp * rb_advance_reader only advances by one event, 5286e1e35927SDavid Sharp * whereas rb_event_ts_length may include the size of 5287e1e35927SDavid Sharp * one or two events. 5288e1e35927SDavid Sharp * We have already ensured there's enough space if this 5289e1e35927SDavid Sharp * is a time extend. */ 5290e1e35927SDavid Sharp size = rb_event_length(event); 5291474d32b6SSteven Rostedt memcpy(bpage->data + pos, rpage->data + rpos, size); 5292ef7a4a16SSteven Rostedt 5293ef7a4a16SSteven Rostedt len -= size; 5294ef7a4a16SSteven Rostedt 5295ef7a4a16SSteven Rostedt rb_advance_reader(cpu_buffer); 5296474d32b6SSteven Rostedt rpos = reader->read; 5297474d32b6SSteven Rostedt pos += size; 5298ef7a4a16SSteven Rostedt 529918fab912SHuang Ying if (rpos >= commit) 530018fab912SHuang Ying break; 530118fab912SHuang Ying 5302ef7a4a16SSteven Rostedt event = rb_reader_event(cpu_buffer); 530369d1b839SSteven Rostedt /* Always keep the time extend and data together */ 530469d1b839SSteven Rostedt size = rb_event_ts_length(event); 5305e1e35927SDavid Sharp } while (len >= size); 5306667d2412SLai Jiangshan 5307667d2412SLai Jiangshan /* update bpage */ 5308ef7a4a16SSteven Rostedt local_set(&bpage->commit, pos); 53094f3640f8SSteven Rostedt bpage->time_stamp = save_timestamp; 5310ef7a4a16SSteven Rostedt 5311474d32b6SSteven Rostedt /* we copied everything to the beginning */ 5312474d32b6SSteven Rostedt read = 0; 53138789a9e7SSteven Rostedt } else { 5314afbab76aSSteven Rostedt /* update the entry counter */ 531577ae365eSSteven Rostedt cpu_buffer->read += rb_page_entries(reader); 5316c64e148aSVaibhav Nagarnaik cpu_buffer->read_bytes += BUF_PAGE_SIZE; 5317afbab76aSSteven Rostedt 53188789a9e7SSteven Rostedt /* swap the pages */ 5319044fa782SSteven Rostedt rb_init_page(bpage); 5320ef7a4a16SSteven Rostedt bpage = reader->page; 5321ef7a4a16SSteven Rostedt reader->page = *data_page; 5322ef7a4a16SSteven Rostedt local_set(&reader->write, 0); 5323778c55d4SSteven Rostedt local_set(&reader->entries, 0); 5324ef7a4a16SSteven Rostedt reader->read = 0; 5325044fa782SSteven Rostedt *data_page = bpage; 5326ff0ff84aSSteven Rostedt 5327ff0ff84aSSteven Rostedt /* 5328ff0ff84aSSteven Rostedt * Use the real_end for the data size, 5329ff0ff84aSSteven Rostedt * This gives us a chance to store the lost events 5330ff0ff84aSSteven Rostedt * on the page. 5331ff0ff84aSSteven Rostedt */ 5332ff0ff84aSSteven Rostedt if (reader->real_end) 5333ff0ff84aSSteven Rostedt local_set(&bpage->commit, reader->real_end); 5334ef7a4a16SSteven Rostedt } 5335ef7a4a16SSteven Rostedt ret = read; 5336ef7a4a16SSteven Rostedt 533766a8cb95SSteven Rostedt cpu_buffer->lost_events = 0; 53382711ca23SSteven Rostedt 53392711ca23SSteven Rostedt commit = local_read(&bpage->commit); 534066a8cb95SSteven Rostedt /* 534166a8cb95SSteven Rostedt * Set a flag in the commit field if we lost events 534266a8cb95SSteven Rostedt */ 5343ff0ff84aSSteven Rostedt if (missed_events) { 5344ff0ff84aSSteven Rostedt /* If there is room at the end of the page to save the 5345ff0ff84aSSteven Rostedt * missed events, then record it there. 5346ff0ff84aSSteven Rostedt */ 5347ff0ff84aSSteven Rostedt if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) { 5348ff0ff84aSSteven Rostedt memcpy(&bpage->data[commit], &missed_events, 5349ff0ff84aSSteven Rostedt sizeof(missed_events)); 5350ff0ff84aSSteven Rostedt local_add(RB_MISSED_STORED, &bpage->commit); 53512711ca23SSteven Rostedt commit += sizeof(missed_events); 5352ff0ff84aSSteven Rostedt } 535366a8cb95SSteven Rostedt local_add(RB_MISSED_EVENTS, &bpage->commit); 5354ff0ff84aSSteven Rostedt } 535566a8cb95SSteven Rostedt 53562711ca23SSteven Rostedt /* 53572711ca23SSteven Rostedt * This page may be off to user land. Zero it out here. 53582711ca23SSteven Rostedt */ 53592711ca23SSteven Rostedt if (commit < BUF_PAGE_SIZE) 53602711ca23SSteven Rostedt memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit); 53612711ca23SSteven Rostedt 5362554f786eSSteven Rostedt out_unlock: 53635389f6faSThomas Gleixner raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 53648789a9e7SSteven Rostedt 5365554f786eSSteven Rostedt out: 53668789a9e7SSteven Rostedt return ret; 53678789a9e7SSteven Rostedt } 5368d6ce96daSSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_read_page); 53698789a9e7SSteven Rostedt 5370b32614c0SSebastian Andrzej Siewior /* 5371b32614c0SSebastian Andrzej Siewior * We only allocate new buffers, never free them if the CPU goes down. 5372b32614c0SSebastian Andrzej Siewior * If we were to free the buffer, then the user would lose any trace that was in 5373b32614c0SSebastian Andrzej Siewior * the buffer. 5374b32614c0SSebastian Andrzej Siewior */ 5375b32614c0SSebastian Andrzej Siewior int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node) 5376554f786eSSteven Rostedt { 537713292494SSteven Rostedt (VMware) struct trace_buffer *buffer; 53789b94a8fbSSteven Rostedt (Red Hat) long nr_pages_same; 53799b94a8fbSSteven Rostedt (Red Hat) int cpu_i; 53809b94a8fbSSteven Rostedt (Red Hat) unsigned long nr_pages; 5381554f786eSSteven Rostedt 538213292494SSteven Rostedt (VMware) buffer = container_of(node, struct trace_buffer, node); 53833f237a79SRusty Russell if (cpumask_test_cpu(cpu, buffer->cpumask)) 5384b32614c0SSebastian Andrzej Siewior return 0; 5385554f786eSSteven Rostedt 5386438ced17SVaibhav Nagarnaik nr_pages = 0; 5387438ced17SVaibhav Nagarnaik nr_pages_same = 1; 5388438ced17SVaibhav Nagarnaik /* check if all cpu sizes are same */ 5389438ced17SVaibhav Nagarnaik for_each_buffer_cpu(buffer, cpu_i) { 5390438ced17SVaibhav Nagarnaik /* fill in the size from first enabled cpu */ 5391438ced17SVaibhav Nagarnaik if (nr_pages == 0) 5392438ced17SVaibhav Nagarnaik nr_pages = buffer->buffers[cpu_i]->nr_pages; 5393438ced17SVaibhav Nagarnaik if (nr_pages != buffer->buffers[cpu_i]->nr_pages) { 5394438ced17SVaibhav Nagarnaik nr_pages_same = 0; 5395438ced17SVaibhav Nagarnaik break; 5396438ced17SVaibhav Nagarnaik } 5397438ced17SVaibhav Nagarnaik } 5398438ced17SVaibhav Nagarnaik /* allocate minimum pages, user can later expand it */ 5399438ced17SVaibhav Nagarnaik if (!nr_pages_same) 5400438ced17SVaibhav Nagarnaik nr_pages = 2; 5401554f786eSSteven Rostedt buffer->buffers[cpu] = 5402438ced17SVaibhav Nagarnaik rb_allocate_cpu_buffer(buffer, nr_pages, cpu); 5403554f786eSSteven Rostedt if (!buffer->buffers[cpu]) { 5404b32614c0SSebastian Andrzej Siewior WARN(1, "failed to allocate ring buffer on CPU %u\n", 5405554f786eSSteven Rostedt cpu); 5406b32614c0SSebastian Andrzej Siewior return -ENOMEM; 5407554f786eSSteven Rostedt } 5408554f786eSSteven Rostedt smp_wmb(); 54093f237a79SRusty Russell cpumask_set_cpu(cpu, buffer->cpumask); 5410b32614c0SSebastian Andrzej Siewior return 0; 5411554f786eSSteven Rostedt } 54126c43e554SSteven Rostedt (Red Hat) 54136c43e554SSteven Rostedt (Red Hat) #ifdef CONFIG_RING_BUFFER_STARTUP_TEST 54146c43e554SSteven Rostedt (Red Hat) /* 54156c43e554SSteven Rostedt (Red Hat) * This is a basic integrity check of the ring buffer. 54166c43e554SSteven Rostedt (Red Hat) * Late in the boot cycle this test will run when configured in. 54176c43e554SSteven Rostedt (Red Hat) * It will kick off a thread per CPU that will go into a loop 54186c43e554SSteven Rostedt (Red Hat) * writing to the per cpu ring buffer various sizes of data. 54196c43e554SSteven Rostedt (Red Hat) * Some of the data will be large items, some small. 54206c43e554SSteven Rostedt (Red Hat) * 54216c43e554SSteven Rostedt (Red Hat) * Another thread is created that goes into a spin, sending out 54226c43e554SSteven Rostedt (Red Hat) * IPIs to the other CPUs to also write into the ring buffer. 54236c43e554SSteven Rostedt (Red Hat) * this is to test the nesting ability of the buffer. 54246c43e554SSteven Rostedt (Red Hat) * 54256c43e554SSteven Rostedt (Red Hat) * Basic stats are recorded and reported. If something in the 54266c43e554SSteven Rostedt (Red Hat) * ring buffer should happen that's not expected, a big warning 54276c43e554SSteven Rostedt (Red Hat) * is displayed and all ring buffers are disabled. 54286c43e554SSteven Rostedt (Red Hat) */ 54296c43e554SSteven Rostedt (Red Hat) static struct task_struct *rb_threads[NR_CPUS] __initdata; 54306c43e554SSteven Rostedt (Red Hat) 54316c43e554SSteven Rostedt (Red Hat) struct rb_test_data { 543213292494SSteven Rostedt (VMware) struct trace_buffer *buffer; 54336c43e554SSteven Rostedt (Red Hat) unsigned long events; 54346c43e554SSteven Rostedt (Red Hat) unsigned long bytes_written; 54356c43e554SSteven Rostedt (Red Hat) unsigned long bytes_alloc; 54366c43e554SSteven Rostedt (Red Hat) unsigned long bytes_dropped; 54376c43e554SSteven Rostedt (Red Hat) unsigned long events_nested; 54386c43e554SSteven Rostedt (Red Hat) unsigned long bytes_written_nested; 54396c43e554SSteven Rostedt (Red Hat) unsigned long bytes_alloc_nested; 54406c43e554SSteven Rostedt (Red Hat) unsigned long bytes_dropped_nested; 54416c43e554SSteven Rostedt (Red Hat) int min_size_nested; 54426c43e554SSteven Rostedt (Red Hat) int max_size_nested; 54436c43e554SSteven Rostedt (Red Hat) int max_size; 54446c43e554SSteven Rostedt (Red Hat) int min_size; 54456c43e554SSteven Rostedt (Red Hat) int cpu; 54466c43e554SSteven Rostedt (Red Hat) int cnt; 54476c43e554SSteven Rostedt (Red Hat) }; 54486c43e554SSteven Rostedt (Red Hat) 54496c43e554SSteven Rostedt (Red Hat) static struct rb_test_data rb_data[NR_CPUS] __initdata; 54506c43e554SSteven Rostedt (Red Hat) 54516c43e554SSteven Rostedt (Red Hat) /* 1 meg per cpu */ 54526c43e554SSteven Rostedt (Red Hat) #define RB_TEST_BUFFER_SIZE 1048576 54536c43e554SSteven Rostedt (Red Hat) 54546c43e554SSteven Rostedt (Red Hat) static char rb_string[] __initdata = 54556c43e554SSteven Rostedt (Red Hat) "abcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()?+\\" 54566c43e554SSteven Rostedt (Red Hat) "?+|:';\",.<>/?abcdefghijklmnopqrstuvwxyz1234567890" 54576c43e554SSteven Rostedt (Red Hat) "!@#$%^&*()?+\\?+|:';\",.<>/?abcdefghijklmnopqrstuv"; 54586c43e554SSteven Rostedt (Red Hat) 54596c43e554SSteven Rostedt (Red Hat) static bool rb_test_started __initdata; 54606c43e554SSteven Rostedt (Red Hat) 54616c43e554SSteven Rostedt (Red Hat) struct rb_item { 54626c43e554SSteven Rostedt (Red Hat) int size; 54636c43e554SSteven Rostedt (Red Hat) char str[]; 54646c43e554SSteven Rostedt (Red Hat) }; 54656c43e554SSteven Rostedt (Red Hat) 54666c43e554SSteven Rostedt (Red Hat) static __init int rb_write_something(struct rb_test_data *data, bool nested) 54676c43e554SSteven Rostedt (Red Hat) { 54686c43e554SSteven Rostedt (Red Hat) struct ring_buffer_event *event; 54696c43e554SSteven Rostedt (Red Hat) struct rb_item *item; 54706c43e554SSteven Rostedt (Red Hat) bool started; 54716c43e554SSteven Rostedt (Red Hat) int event_len; 54726c43e554SSteven Rostedt (Red Hat) int size; 54736c43e554SSteven Rostedt (Red Hat) int len; 54746c43e554SSteven Rostedt (Red Hat) int cnt; 54756c43e554SSteven Rostedt (Red Hat) 54766c43e554SSteven Rostedt (Red Hat) /* Have nested writes different that what is written */ 54776c43e554SSteven Rostedt (Red Hat) cnt = data->cnt + (nested ? 27 : 0); 54786c43e554SSteven Rostedt (Red Hat) 54796c43e554SSteven Rostedt (Red Hat) /* Multiply cnt by ~e, to make some unique increment */ 548040ed29b3SYueHaibing size = (cnt * 68 / 25) % (sizeof(rb_string) - 1); 54816c43e554SSteven Rostedt (Red Hat) 54826c43e554SSteven Rostedt (Red Hat) len = size + sizeof(struct rb_item); 54836c43e554SSteven Rostedt (Red Hat) 54846c43e554SSteven Rostedt (Red Hat) started = rb_test_started; 54856c43e554SSteven Rostedt (Red Hat) /* read rb_test_started before checking buffer enabled */ 54866c43e554SSteven Rostedt (Red Hat) smp_rmb(); 54876c43e554SSteven Rostedt (Red Hat) 54886c43e554SSteven Rostedt (Red Hat) event = ring_buffer_lock_reserve(data->buffer, len); 54896c43e554SSteven Rostedt (Red Hat) if (!event) { 54906c43e554SSteven Rostedt (Red Hat) /* Ignore dropped events before test starts. */ 54916c43e554SSteven Rostedt (Red Hat) if (started) { 54926c43e554SSteven Rostedt (Red Hat) if (nested) 54936c43e554SSteven Rostedt (Red Hat) data->bytes_dropped += len; 54946c43e554SSteven Rostedt (Red Hat) else 54956c43e554SSteven Rostedt (Red Hat) data->bytes_dropped_nested += len; 54966c43e554SSteven Rostedt (Red Hat) } 54976c43e554SSteven Rostedt (Red Hat) return len; 54986c43e554SSteven Rostedt (Red Hat) } 54996c43e554SSteven Rostedt (Red Hat) 55006c43e554SSteven Rostedt (Red Hat) event_len = ring_buffer_event_length(event); 55016c43e554SSteven Rostedt (Red Hat) 55026c43e554SSteven Rostedt (Red Hat) if (RB_WARN_ON(data->buffer, event_len < len)) 55036c43e554SSteven Rostedt (Red Hat) goto out; 55046c43e554SSteven Rostedt (Red Hat) 55056c43e554SSteven Rostedt (Red Hat) item = ring_buffer_event_data(event); 55066c43e554SSteven Rostedt (Red Hat) item->size = size; 55076c43e554SSteven Rostedt (Red Hat) memcpy(item->str, rb_string, size); 55086c43e554SSteven Rostedt (Red Hat) 55096c43e554SSteven Rostedt (Red Hat) if (nested) { 55106c43e554SSteven Rostedt (Red Hat) data->bytes_alloc_nested += event_len; 55116c43e554SSteven Rostedt (Red Hat) data->bytes_written_nested += len; 55126c43e554SSteven Rostedt (Red Hat) data->events_nested++; 55136c43e554SSteven Rostedt (Red Hat) if (!data->min_size_nested || len < data->min_size_nested) 55146c43e554SSteven Rostedt (Red Hat) data->min_size_nested = len; 55156c43e554SSteven Rostedt (Red Hat) if (len > data->max_size_nested) 55166c43e554SSteven Rostedt (Red Hat) data->max_size_nested = len; 55176c43e554SSteven Rostedt (Red Hat) } else { 55186c43e554SSteven Rostedt (Red Hat) data->bytes_alloc += event_len; 55196c43e554SSteven Rostedt (Red Hat) data->bytes_written += len; 55206c43e554SSteven Rostedt (Red Hat) data->events++; 55216c43e554SSteven Rostedt (Red Hat) if (!data->min_size || len < data->min_size) 55226c43e554SSteven Rostedt (Red Hat) data->max_size = len; 55236c43e554SSteven Rostedt (Red Hat) if (len > data->max_size) 55246c43e554SSteven Rostedt (Red Hat) data->max_size = len; 55256c43e554SSteven Rostedt (Red Hat) } 55266c43e554SSteven Rostedt (Red Hat) 55276c43e554SSteven Rostedt (Red Hat) out: 55286c43e554SSteven Rostedt (Red Hat) ring_buffer_unlock_commit(data->buffer, event); 55296c43e554SSteven Rostedt (Red Hat) 55306c43e554SSteven Rostedt (Red Hat) return 0; 55316c43e554SSteven Rostedt (Red Hat) } 55326c43e554SSteven Rostedt (Red Hat) 55336c43e554SSteven Rostedt (Red Hat) static __init int rb_test(void *arg) 55346c43e554SSteven Rostedt (Red Hat) { 55356c43e554SSteven Rostedt (Red Hat) struct rb_test_data *data = arg; 55366c43e554SSteven Rostedt (Red Hat) 55376c43e554SSteven Rostedt (Red Hat) while (!kthread_should_stop()) { 55386c43e554SSteven Rostedt (Red Hat) rb_write_something(data, false); 55396c43e554SSteven Rostedt (Red Hat) data->cnt++; 55406c43e554SSteven Rostedt (Red Hat) 55416c43e554SSteven Rostedt (Red Hat) set_current_state(TASK_INTERRUPTIBLE); 55426c43e554SSteven Rostedt (Red Hat) /* Now sleep between a min of 100-300us and a max of 1ms */ 55436c43e554SSteven Rostedt (Red Hat) usleep_range(((data->cnt % 3) + 1) * 100, 1000); 55446c43e554SSteven Rostedt (Red Hat) } 55456c43e554SSteven Rostedt (Red Hat) 55466c43e554SSteven Rostedt (Red Hat) return 0; 55476c43e554SSteven Rostedt (Red Hat) } 55486c43e554SSteven Rostedt (Red Hat) 55496c43e554SSteven Rostedt (Red Hat) static __init void rb_ipi(void *ignore) 55506c43e554SSteven Rostedt (Red Hat) { 55516c43e554SSteven Rostedt (Red Hat) struct rb_test_data *data; 55526c43e554SSteven Rostedt (Red Hat) int cpu = smp_processor_id(); 55536c43e554SSteven Rostedt (Red Hat) 55546c43e554SSteven Rostedt (Red Hat) data = &rb_data[cpu]; 55556c43e554SSteven Rostedt (Red Hat) rb_write_something(data, true); 55566c43e554SSteven Rostedt (Red Hat) } 55576c43e554SSteven Rostedt (Red Hat) 55586c43e554SSteven Rostedt (Red Hat) static __init int rb_hammer_test(void *arg) 55596c43e554SSteven Rostedt (Red Hat) { 55606c43e554SSteven Rostedt (Red Hat) while (!kthread_should_stop()) { 55616c43e554SSteven Rostedt (Red Hat) 55626c43e554SSteven Rostedt (Red Hat) /* Send an IPI to all cpus to write data! */ 55636c43e554SSteven Rostedt (Red Hat) smp_call_function(rb_ipi, NULL, 1); 55646c43e554SSteven Rostedt (Red Hat) /* No sleep, but for non preempt, let others run */ 55656c43e554SSteven Rostedt (Red Hat) schedule(); 55666c43e554SSteven Rostedt (Red Hat) } 55676c43e554SSteven Rostedt (Red Hat) 55686c43e554SSteven Rostedt (Red Hat) return 0; 55696c43e554SSteven Rostedt (Red Hat) } 55706c43e554SSteven Rostedt (Red Hat) 55716c43e554SSteven Rostedt (Red Hat) static __init int test_ringbuffer(void) 55726c43e554SSteven Rostedt (Red Hat) { 55736c43e554SSteven Rostedt (Red Hat) struct task_struct *rb_hammer; 557413292494SSteven Rostedt (VMware) struct trace_buffer *buffer; 55756c43e554SSteven Rostedt (Red Hat) int cpu; 55766c43e554SSteven Rostedt (Red Hat) int ret = 0; 55776c43e554SSteven Rostedt (Red Hat) 5578a356646aSSteven Rostedt (VMware) if (security_locked_down(LOCKDOWN_TRACEFS)) { 5579ee195452SStephen Rothwell pr_warn("Lockdown is enabled, skipping ring buffer tests\n"); 5580a356646aSSteven Rostedt (VMware) return 0; 5581a356646aSSteven Rostedt (VMware) } 5582a356646aSSteven Rostedt (VMware) 55836c43e554SSteven Rostedt (Red Hat) pr_info("Running ring buffer tests...\n"); 55846c43e554SSteven Rostedt (Red Hat) 55856c43e554SSteven Rostedt (Red Hat) buffer = ring_buffer_alloc(RB_TEST_BUFFER_SIZE, RB_FL_OVERWRITE); 55866c43e554SSteven Rostedt (Red Hat) if (WARN_ON(!buffer)) 55876c43e554SSteven Rostedt (Red Hat) return 0; 55886c43e554SSteven Rostedt (Red Hat) 55896c43e554SSteven Rostedt (Red Hat) /* Disable buffer so that threads can't write to it yet */ 55906c43e554SSteven Rostedt (Red Hat) ring_buffer_record_off(buffer); 55916c43e554SSteven Rostedt (Red Hat) 55926c43e554SSteven Rostedt (Red Hat) for_each_online_cpu(cpu) { 55936c43e554SSteven Rostedt (Red Hat) rb_data[cpu].buffer = buffer; 55946c43e554SSteven Rostedt (Red Hat) rb_data[cpu].cpu = cpu; 55956c43e554SSteven Rostedt (Red Hat) rb_data[cpu].cnt = cpu; 55966c43e554SSteven Rostedt (Red Hat) rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu], 55976c43e554SSteven Rostedt (Red Hat) "rbtester/%d", cpu); 559862277de7SWei Yongjun if (WARN_ON(IS_ERR(rb_threads[cpu]))) { 55996c43e554SSteven Rostedt (Red Hat) pr_cont("FAILED\n"); 560062277de7SWei Yongjun ret = PTR_ERR(rb_threads[cpu]); 56016c43e554SSteven Rostedt (Red Hat) goto out_free; 56026c43e554SSteven Rostedt (Red Hat) } 56036c43e554SSteven Rostedt (Red Hat) 56046c43e554SSteven Rostedt (Red Hat) kthread_bind(rb_threads[cpu], cpu); 56056c43e554SSteven Rostedt (Red Hat) wake_up_process(rb_threads[cpu]); 56066c43e554SSteven Rostedt (Red Hat) } 56076c43e554SSteven Rostedt (Red Hat) 56086c43e554SSteven Rostedt (Red Hat) /* Now create the rb hammer! */ 56096c43e554SSteven Rostedt (Red Hat) rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer"); 561062277de7SWei Yongjun if (WARN_ON(IS_ERR(rb_hammer))) { 56116c43e554SSteven Rostedt (Red Hat) pr_cont("FAILED\n"); 561262277de7SWei Yongjun ret = PTR_ERR(rb_hammer); 56136c43e554SSteven Rostedt (Red Hat) goto out_free; 56146c43e554SSteven Rostedt (Red Hat) } 56156c43e554SSteven Rostedt (Red Hat) 56166c43e554SSteven Rostedt (Red Hat) ring_buffer_record_on(buffer); 56176c43e554SSteven Rostedt (Red Hat) /* 56186c43e554SSteven Rostedt (Red Hat) * Show buffer is enabled before setting rb_test_started. 56196c43e554SSteven Rostedt (Red Hat) * Yes there's a small race window where events could be 56206c43e554SSteven Rostedt (Red Hat) * dropped and the thread wont catch it. But when a ring 56216c43e554SSteven Rostedt (Red Hat) * buffer gets enabled, there will always be some kind of 56226c43e554SSteven Rostedt (Red Hat) * delay before other CPUs see it. Thus, we don't care about 56236c43e554SSteven Rostedt (Red Hat) * those dropped events. We care about events dropped after 56246c43e554SSteven Rostedt (Red Hat) * the threads see that the buffer is active. 56256c43e554SSteven Rostedt (Red Hat) */ 56266c43e554SSteven Rostedt (Red Hat) smp_wmb(); 56276c43e554SSteven Rostedt (Red Hat) rb_test_started = true; 56286c43e554SSteven Rostedt (Red Hat) 56296c43e554SSteven Rostedt (Red Hat) set_current_state(TASK_INTERRUPTIBLE); 56306c43e554SSteven Rostedt (Red Hat) /* Just run for 10 seconds */; 56316c43e554SSteven Rostedt (Red Hat) schedule_timeout(10 * HZ); 56326c43e554SSteven Rostedt (Red Hat) 56336c43e554SSteven Rostedt (Red Hat) kthread_stop(rb_hammer); 56346c43e554SSteven Rostedt (Red Hat) 56356c43e554SSteven Rostedt (Red Hat) out_free: 56366c43e554SSteven Rostedt (Red Hat) for_each_online_cpu(cpu) { 56376c43e554SSteven Rostedt (Red Hat) if (!rb_threads[cpu]) 56386c43e554SSteven Rostedt (Red Hat) break; 56396c43e554SSteven Rostedt (Red Hat) kthread_stop(rb_threads[cpu]); 56406c43e554SSteven Rostedt (Red Hat) } 56416c43e554SSteven Rostedt (Red Hat) if (ret) { 56426c43e554SSteven Rostedt (Red Hat) ring_buffer_free(buffer); 56436c43e554SSteven Rostedt (Red Hat) return ret; 56446c43e554SSteven Rostedt (Red Hat) } 56456c43e554SSteven Rostedt (Red Hat) 56466c43e554SSteven Rostedt (Red Hat) /* Report! */ 56476c43e554SSteven Rostedt (Red Hat) pr_info("finished\n"); 56486c43e554SSteven Rostedt (Red Hat) for_each_online_cpu(cpu) { 56496c43e554SSteven Rostedt (Red Hat) struct ring_buffer_event *event; 56506c43e554SSteven Rostedt (Red Hat) struct rb_test_data *data = &rb_data[cpu]; 56516c43e554SSteven Rostedt (Red Hat) struct rb_item *item; 56526c43e554SSteven Rostedt (Red Hat) unsigned long total_events; 56536c43e554SSteven Rostedt (Red Hat) unsigned long total_dropped; 56546c43e554SSteven Rostedt (Red Hat) unsigned long total_written; 56556c43e554SSteven Rostedt (Red Hat) unsigned long total_alloc; 56566c43e554SSteven Rostedt (Red Hat) unsigned long total_read = 0; 56576c43e554SSteven Rostedt (Red Hat) unsigned long total_size = 0; 56586c43e554SSteven Rostedt (Red Hat) unsigned long total_len = 0; 56596c43e554SSteven Rostedt (Red Hat) unsigned long total_lost = 0; 56606c43e554SSteven Rostedt (Red Hat) unsigned long lost; 56616c43e554SSteven Rostedt (Red Hat) int big_event_size; 56626c43e554SSteven Rostedt (Red Hat) int small_event_size; 56636c43e554SSteven Rostedt (Red Hat) 56646c43e554SSteven Rostedt (Red Hat) ret = -1; 56656c43e554SSteven Rostedt (Red Hat) 56666c43e554SSteven Rostedt (Red Hat) total_events = data->events + data->events_nested; 56676c43e554SSteven Rostedt (Red Hat) total_written = data->bytes_written + data->bytes_written_nested; 56686c43e554SSteven Rostedt (Red Hat) total_alloc = data->bytes_alloc + data->bytes_alloc_nested; 56696c43e554SSteven Rostedt (Red Hat) total_dropped = data->bytes_dropped + data->bytes_dropped_nested; 56706c43e554SSteven Rostedt (Red Hat) 56716c43e554SSteven Rostedt (Red Hat) big_event_size = data->max_size + data->max_size_nested; 56726c43e554SSteven Rostedt (Red Hat) small_event_size = data->min_size + data->min_size_nested; 56736c43e554SSteven Rostedt (Red Hat) 56746c43e554SSteven Rostedt (Red Hat) pr_info("CPU %d:\n", cpu); 56756c43e554SSteven Rostedt (Red Hat) pr_info(" events: %ld\n", total_events); 56766c43e554SSteven Rostedt (Red Hat) pr_info(" dropped bytes: %ld\n", total_dropped); 56776c43e554SSteven Rostedt (Red Hat) pr_info(" alloced bytes: %ld\n", total_alloc); 56786c43e554SSteven Rostedt (Red Hat) pr_info(" written bytes: %ld\n", total_written); 56796c43e554SSteven Rostedt (Red Hat) pr_info(" biggest event: %d\n", big_event_size); 56806c43e554SSteven Rostedt (Red Hat) pr_info(" smallest event: %d\n", small_event_size); 56816c43e554SSteven Rostedt (Red Hat) 56826c43e554SSteven Rostedt (Red Hat) if (RB_WARN_ON(buffer, total_dropped)) 56836c43e554SSteven Rostedt (Red Hat) break; 56846c43e554SSteven Rostedt (Red Hat) 56856c43e554SSteven Rostedt (Red Hat) ret = 0; 56866c43e554SSteven Rostedt (Red Hat) 56876c43e554SSteven Rostedt (Red Hat) while ((event = ring_buffer_consume(buffer, cpu, NULL, &lost))) { 56886c43e554SSteven Rostedt (Red Hat) total_lost += lost; 56896c43e554SSteven Rostedt (Red Hat) item = ring_buffer_event_data(event); 56906c43e554SSteven Rostedt (Red Hat) total_len += ring_buffer_event_length(event); 56916c43e554SSteven Rostedt (Red Hat) total_size += item->size + sizeof(struct rb_item); 56926c43e554SSteven Rostedt (Red Hat) if (memcmp(&item->str[0], rb_string, item->size) != 0) { 56936c43e554SSteven Rostedt (Red Hat) pr_info("FAILED!\n"); 56946c43e554SSteven Rostedt (Red Hat) pr_info("buffer had: %.*s\n", item->size, item->str); 56956c43e554SSteven Rostedt (Red Hat) pr_info("expected: %.*s\n", item->size, rb_string); 56966c43e554SSteven Rostedt (Red Hat) RB_WARN_ON(buffer, 1); 56976c43e554SSteven Rostedt (Red Hat) ret = -1; 56986c43e554SSteven Rostedt (Red Hat) break; 56996c43e554SSteven Rostedt (Red Hat) } 57006c43e554SSteven Rostedt (Red Hat) total_read++; 57016c43e554SSteven Rostedt (Red Hat) } 57026c43e554SSteven Rostedt (Red Hat) if (ret) 57036c43e554SSteven Rostedt (Red Hat) break; 57046c43e554SSteven Rostedt (Red Hat) 57056c43e554SSteven Rostedt (Red Hat) ret = -1; 57066c43e554SSteven Rostedt (Red Hat) 57076c43e554SSteven Rostedt (Red Hat) pr_info(" read events: %ld\n", total_read); 57086c43e554SSteven Rostedt (Red Hat) pr_info(" lost events: %ld\n", total_lost); 57096c43e554SSteven Rostedt (Red Hat) pr_info(" total events: %ld\n", total_lost + total_read); 57106c43e554SSteven Rostedt (Red Hat) pr_info(" recorded len bytes: %ld\n", total_len); 57116c43e554SSteven Rostedt (Red Hat) pr_info(" recorded size bytes: %ld\n", total_size); 57126c43e554SSteven Rostedt (Red Hat) if (total_lost) 57136c43e554SSteven Rostedt (Red Hat) pr_info(" With dropped events, record len and size may not match\n" 57146c43e554SSteven Rostedt (Red Hat) " alloced and written from above\n"); 57156c43e554SSteven Rostedt (Red Hat) if (!total_lost) { 57166c43e554SSteven Rostedt (Red Hat) if (RB_WARN_ON(buffer, total_len != total_alloc || 57176c43e554SSteven Rostedt (Red Hat) total_size != total_written)) 57186c43e554SSteven Rostedt (Red Hat) break; 57196c43e554SSteven Rostedt (Red Hat) } 57206c43e554SSteven Rostedt (Red Hat) if (RB_WARN_ON(buffer, total_lost + total_read != total_events)) 57216c43e554SSteven Rostedt (Red Hat) break; 57226c43e554SSteven Rostedt (Red Hat) 57236c43e554SSteven Rostedt (Red Hat) ret = 0; 57246c43e554SSteven Rostedt (Red Hat) } 57256c43e554SSteven Rostedt (Red Hat) if (!ret) 57266c43e554SSteven Rostedt (Red Hat) pr_info("Ring buffer PASSED!\n"); 57276c43e554SSteven Rostedt (Red Hat) 57286c43e554SSteven Rostedt (Red Hat) ring_buffer_free(buffer); 57296c43e554SSteven Rostedt (Red Hat) return 0; 57306c43e554SSteven Rostedt (Red Hat) } 57316c43e554SSteven Rostedt (Red Hat) 57326c43e554SSteven Rostedt (Red Hat) late_initcall(test_ringbuffer); 57336c43e554SSteven Rostedt (Red Hat) #endif /* CONFIG_RING_BUFFER_STARTUP_TEST */ 5734